In [ ]:
# use if you have a gpu
# !pip install cudf-cu11 --extra-index-url=https://pypi.nvidia.com
# import cudf
# %load_ext cudf.pandas
In [ ]:
def classify_value(value):
    for label, interval in occ_labels.items():
        if interval[0] <= value <= interval[1]:
            return label

occ_labels = {'Occ_Class_0': (0, 0)}
j = 1
for i in range(1, 101, 10):
    occ_labels[f'Occ_Class_{j}'] = (i, i + 9)
    j += 1
occ_labels
Out[ ]:
{'Occ_Class_0': (0, 0),
 'Occ_Class_1': (1, 10),
 'Occ_Class_2': (11, 20),
 'Occ_Class_3': (21, 30),
 'Occ_Class_4': (31, 40),
 'Occ_Class_5': (41, 50),
 'Occ_Class_6': (51, 60),
 'Occ_Class_7': (61, 70),
 'Occ_Class_8': (71, 80),
 'Occ_Class_9': (81, 90),
 'Occ_Class_10': (91, 100)}
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import sklearn
import plotly.graph_objects as go
import plotly.express as px
from joblib import Parallel, delayed
import joblib
from collections import Counter
In [ ]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Concat.csv')
In [ ]:
df.shape # (rows, columns)
Out[ ]:
(12071, 18)
In [ ]:
df.replace('$', np.nan, inplace = True)
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['CO (ppm)'] = df['CO (ppm)'].astype('float64')
df['NO2 (ppm)'] = df['NO2 (ppm)'].astype('float64')
df['CO2 (ppm)'] = df['CO2 (ppm)'].astype('float64')
df['Occupancy_Classified'] = df['Occupancy'].apply(classify_value).map(lambda x: int(x.split('_')[2]) - 1)
df = df.query("`Occupancy_Classified` != -1").reset_index(drop = True)
In [ ]:
df.head()
Out[ ]:
Timestamp CO (ppm) NO2 (ppm) CO2 (ppm) TVOC (ppb) PM1 (ug/m3) PM2.5 (ug/m3) PM10 (ug/m3) Temperature (C) Humidity (%) Sound (dB) Occupancy Position Room Condition Room Type Floor No. Weather Occupancy_Classified
0 2023-08-16 11:16:33 36.3 0.1 1701.0 0.0 44.0 65.0 70.0 26.1 14.1 72.1 26.0 middle ac lab 2.0 sunny 2
1 2023-08-16 11:16:38 36.1 0.1 1699.0 0.0 44.0 64.0 69.0 26.1 14.1 70.5 26.0 middle ac lab 2.0 sunny 2
2 2023-08-16 11:16:44 36.5 0.1 1695.0 0.0 44.0 64.0 69.0 26.1 14.1 77.2 26.0 middle ac lab 2.0 sunny 2
3 2023-08-16 11:16:50 34.2 0.1 1690.0 0.0 43.0 64.0 68.0 26.1 14.1 72.2 26.0 middle ac lab 2.0 sunny 2
4 2023-08-16 11:16:55 36.3 0.1 1684.0 0.0 43.0 64.0 67.0 26.1 14.0 75.3 26.0 middle ac lab 2.0 sunny 2
In [ ]:
df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
Out[ ]:
<Axes: xlabel='Occupancy_Classified'>
No description has been provided for this image
In [ ]:
# Find the columns which contain strings
for column in df.columns:
    if pd.api.types.is_string_dtype(df[column]):
        print(column)
Position
Room Condition
Room Type
Weather
In [ ]:
{i: pd.api.types.is_string_dtype(df[i]) for i in df.columns}
Out[ ]:
{'Timestamp': False,
 'CO (ppm)': False,
 'NO2 (ppm)': False,
 'CO2 (ppm)': False,
 'TVOC (ppb)': False,
 'PM1 (ug/m3)': False,
 'PM2.5 (ug/m3)': False,
 'PM10 (ug/m3)': False,
 'Temperature (C)': False,
 'Humidity (%)': False,
 'Sound (dB)': False,
 'Occupancy': False,
 'Position': True,
 'Room Condition': True,
 'Room Type': True,
 'Floor No.': False,
 'Weather': True,
 'Occupancy_Classified': False}
In [ ]:
for column in df.columns:
    if pd.api.types.is_string_dtype(df[column]):
        df[column] = df[column].astype('category').cat.as_ordered()
In [ ]:
# Check missing data ratio percentage

df.isna().sum() * 100.00 / len(df)
Out[ ]:
Timestamp                0.000000
CO (ppm)                24.962204
NO2 (ppm)               24.962204
CO2 (ppm)                3.855199
TVOC (ppb)               0.000000
PM1 (ug/m3)              0.000000
PM2.5 (ug/m3)            0.000000
PM10 (ug/m3)             0.000000
Temperature (C)          0.000000
Humidity (%)             0.000000
Sound (dB)               0.000000
Occupancy                0.000000
Position                 0.000000
Room Condition           0.000000
Room Type                0.000000
Floor No.                0.000000
Weather                  0.000000
Occupancy_Classified     0.000000
dtype: float64
In [ ]:
df.isna().sum()
Out[ ]:
Timestamp                  0
CO (ppm)                2972
NO2 (ppm)               2972
CO2 (ppm)                459
TVOC (ppb)                 0
PM1 (ug/m3)                0
PM2.5 (ug/m3)              0
PM10 (ug/m3)               0
Temperature (C)            0
Humidity (%)               0
Sound (dB)                 0
Occupancy                  0
Position                   0
Room Condition             0
Room Type                  0
Floor No.                  0
Weather                    0
Occupancy_Classified       0
dtype: int64
In [ ]:
for column in df.columns:
    if pd.api.types.is_numeric_dtype(df[column]):
        print(column)
CO (ppm)
NO2 (ppm)
CO2 (ppm)
TVOC (ppb)
PM1 (ug/m3)
PM2.5 (ug/m3)
PM10 (ug/m3)
Temperature (C)
Humidity (%)
Sound (dB)
Occupancy
Floor No.
Occupancy_Classified
In [ ]:
# Check for which numeric columns have null values
for column in df.columns:
    if pd.api.types.is_numeric_dtype(df[column]) and df[column].isna().sum():
        print(column)
CO (ppm)
NO2 (ppm)
CO2 (ppm)
In [ ]:
ax = df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
for container in ax.containers:
    ax.bar_label(container, fmt='%d', label_type='edge')

plt.xticks(rotation = 0);
No description has been provided for this image
In [ ]:
# Dropping all missing rows
new_df = df.dropna().reset_index(drop = True)
In [ ]:
new_df.shape
Out[ ]:
(8907, 18)
In [ ]:
ax = new_df['Occupancy_Classified'].value_counts().plot(kind = 'bar')
for container in ax.containers:
    ax.bar_label(container, fmt='%d', label_type='edge')

plt.xticks(rotation = 0);
No description has been provided for this image
In [ ]:
# Check for columns which aren't numeric or is categorical
# and print category codes
l = 0
for column in new_df.columns:
    if pd.api.types.is_categorical_dtype(new_df[column]):
        l += 1
        print(f'{column}: {dict(enumerate(df[column].cat.categories))}')
print(l)
Position: {0: 'backside', 1: 'frontside', 2: 'middle'}
Room Condition: {0: 'ac', 1: 'non ac'}
Room Type: {0: 'classroom', 1: 'lab'}
Weather: {0: 'cloudy', 1: 'overcast', 2: 'rainy', 3: 'sunny'}
4
In [ ]:
# Turn all categorical variables into numbers and fill missing
for column in new_df.columns:
    if pd.api.types.is_categorical_dtype(new_df[column]):
        # Turn categories into numbers and add +1
        new_df[column] = pd.Categorical(new_df[column]).codes + 1

ML¶

Original ML Pipeline Module¶

In [ ]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
import warnings
import time
warnings.filterwarnings("ignore")
np.random.seed(42)

class MultiModelEvaluator:
    def __init__(self, models):
        self.models = models
        self.model_names = list(models.keys())
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.metric_scores = {}

    def split_data(self, X, y, test_size=0.2, random_state=42):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    def train_models(self):
        for model_name, model in self.models.items():
            print(f"\n================================================\n{model_name} model has started training")
            start = time.time()
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', model)
            ])
            pipeline.fit(self.X_train, self.y_train)
            self.models[model_name] = pipeline
            print(f"{model_name} model has ended training. Time -> {round(time.time() - start, 2)}s. Accuracy - > {(pipeline.score(self.X_test, self.y_test) * 100.00):.2f} %\n================================================\n")

    def evaluate_models(self, X_test, y_test):
        for model_name, pipeline in self.models.items():
            Train_y_pred = pipeline.predict(self.X_train)
            Train_accuracy = accuracy_score(self.y_train, Train_y_pred)
            Train_f1_macro = f1_score(self.y_train, Train_y_pred, average='macro')
            Train_f1_weighted = f1_score(self.y_train, Train_y_pred, average='weighted')
            Train_recall_macro = recall_score(self.y_train, Train_y_pred, average='macro')
            Train_recall_weighted = recall_score(self.y_train, Train_y_pred, average='weighted')
            Train_precision_macro = precision_score(self.y_train, Train_y_pred, average='macro')
            Train_precision_weighted = precision_score(self.y_train, Train_y_pred, average='weighted')
            Train_confusion = confusion_matrix(self.y_train, Train_y_pred)

            Test_y_pred = pipeline.predict(X_test)
            Test_accuracy = accuracy_score(y_test, Test_y_pred)
            Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
            Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
            Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
            Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
            Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
            Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
            Test_confusion = confusion_matrix(y_test, Test_y_pred)

            self.metric_scores[model_name] = {
                'Train Accuracy': Train_accuracy,
                'Train F1 Macro': Train_f1_macro,
                'Train F1 Weighted': Train_f1_weighted,
                'Train Recall Macro': Train_recall_macro,
                'Train Recall Weighted': Train_recall_weighted,
                'Train Precision Macro': Train_precision_macro,
                'Train Precision Weighted': Train_precision_weighted,
                'Train Confusion Matrix': Train_confusion,
                'Test Accuracy': Test_accuracy,
                'Test F1 Macro': Test_f1_macro,
                'Test F1 Weighted': Test_f1_weighted,
                'Test Recall Macro': Test_recall_macro,
                'Test Recall Weighted': Test_recall_weighted,
                'Test Precision Macro': Test_precision_macro,
                'Test Precision Weighted': Test_precision_weighted,
                'Test Confusion Matrix': Test_confusion
            }

    def get_metric_scores(self, model_name):
        return self.metric_scores.get(model_name, {})

Hyper-Tuned Pipeline Module¶

In [ ]:
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
import warnings
import numpy as np
import time
warnings.filterwarnings("ignore")
np.random.seed(42)

class MultiModelEvaluatorWithTuning:
    def __init__(self, models, param_grids, n_iter_values = {}, n_jobs_values = {}, verbose_values = {}):
        self.models = models
        self.model_names = list(models.keys())
        self.param_grids = param_grids
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.n_iter_values = n_iter_values
        self.n_jobs_values = n_jobs_values
        self.verbose_values = verbose_values
        self.metric_scores = {}
        self.best_params = {}

    def split_data(self, X, y, test_size=0.2, random_state=42):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    def train_models(self):
        for model_name, model in self.models.items():
            print(f"\n================================================\n{model_name} tuned model has started training")
            start = time.time()
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', model)
            ])
            if model_name not in self.n_iter_values:
                search = GridSearchCV(pipeline, self.param_grids[model_name], cv=5, n_jobs = self.n_jobs_values.get(model_name, -1), verbose = self.verbose_values.get(model_name, 1))
            else:
                search = RandomizedSearchCV(pipeline, self.param_grids[model_name], cv=5, n_jobs = self.n_jobs_values.get(model_name, -1), n_iter = self.n_iter_values.get(model_name, 10), verbose = self.verbose_values.get(model_name, 1))
            search.fit(self.X_train, self.y_train)
            best_model = search.best_estimator_
            self.models[model_name] = best_model
            self.best_params[model_name] = search.best_params_
            print(f"{model_name} tuned model has ended training. Time -> {round(time.time() - start, 2)}s. Accuracy - > {(best_model.score(self.X_test, self.y_test) * 100.00):.2f} %\n================================================\n")


    def evaluate_models(self, X_test, y_test):
        for model_name, pipeline in self.models.items():
            Train_y_pred = pipeline.predict(self.X_train)
            Train_accuracy = accuracy_score(self.y_train, Train_y_pred)
            Train_f1_macro = f1_score(self.y_train, Train_y_pred, average='macro')
            Train_f1_weighted = f1_score(self.y_train, Train_y_pred, average='weighted')
            Train_recall_macro = recall_score(self.y_train, Train_y_pred, average='macro')
            Train_recall_weighted = recall_score(self.y_train, Train_y_pred, average='weighted')
            Train_precision_macro = precision_score(self.y_train, Train_y_pred, average='macro')
            Train_precision_weighted = precision_score(self.y_train, Train_y_pred, average='weighted')
            Train_confusion = confusion_matrix(self.y_train, Train_y_pred)

            Test_y_pred = pipeline.predict(X_test)
            Test_accuracy = accuracy_score(y_test, Test_y_pred)
            Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
            Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
            Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
            Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
            Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
            Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
            Test_confusion = confusion_matrix(y_test, Test_y_pred)

            self.metric_scores[model_name] = {
                'Train Accuracy': Train_accuracy,
                'Train F1 Macro': Train_f1_macro,
                'Train F1 Weighted': Train_f1_weighted,
                'Train Recall Macro': Train_recall_macro,
                'Train Recall Weighted': Train_recall_weighted,
                'Train Precision Macro': Train_precision_macro,
                'Train Precision Weighted': Train_precision_weighted,
                'Train Confusion Matrix': Train_confusion,
                'Test Accuracy': Test_accuracy,
                'Test F1 Macro': Test_f1_macro,
                'Test F1 Weighted': Test_f1_weighted,
                'Test Recall Macro': Test_recall_macro,
                'Test Recall Weighted': Test_recall_weighted,
                'Test Precision Macro': Test_precision_macro,
                'Test Precision Weighted': Test_precision_weighted,
                'Test Confusion Matrix': Test_confusion
            }

    def get_metric_scores(self, model_name):
        return self.metric_scores.get(model_name, {})

    def get_best_params(self, model_name):
        return self.best_params.get(model_name, {})
In [ ]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score
import matplotlib.pyplot as plt

class DeepModel(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(DeepModel, self).__init__()
        layers = []
        in_features = input_size

        # Input normalization
        layers.append(nn.BatchNorm1d(input_size))

        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(in_features, hidden_size))
            # Batch normalization for hidden layers
            layers.append(nn.BatchNorm1d(hidden_size))
            layers.append(nn.ReLU())
            in_features = hidden_size

        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

def ann_model(X, y, test_size = 0.2):
    # Evaluate the best model on the test set
    def evaluate_accuracy(model, data_loader):
        model.eval()
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for inputs, labels in data_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += labels.size(0)
        accuracy = total_correct / total_samples
        return accuracy

    # Convert to PyTorch tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.long)

    # Convert data to PyTorch dataset
    dataset = data.TensorDataset(X, y)

    # Split the data into train, dev, and test sets
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size, random_state=42)
    X_dev, X_test, y_dev, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    # Create PyTorch DataLoader for each set
    train_loader = data.DataLoader(data.TensorDataset(X_train, y_train), batch_size=32, shuffle=True)
    dev_loader = data.DataLoader(data.TensorDataset(X_dev, y_dev), batch_size=32, shuffle=False)
    test_loader = data.DataLoader(data.TensorDataset(X_test, y_test), batch_size=32, shuffle=False)

    # Define hyperparameters to try
    learning_rate = 0.01
    num_epochs = 50

    # Function to train and evaluate the model
    def train_and_evaluate(model, learning_rate):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)

        # Lists to store convergence data
        train_losses = []
        dev_losses = []
        train_accuracies = []
        dev_accuracies = []

        # Training loop
        for epoch in range(num_epochs):
            model.train()
            for inputs, labels in train_loader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            # Compute and store losses and accuracies
            train_loss = loss.item()
            train_losses.append(train_loss)
            train_acc = evaluate_accuracy(model, train_loader)
            train_accuracies.append(train_acc)
            dev_acc = evaluate_accuracy(model, dev_loader)
            dev_accuracies.append(dev_acc)
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}, Dev Accuracy: {dev_acc:.4f}")

        # Plot convergence graph
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.subplot(1, 2, 2)
        plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy')
        plt.plot(range(1, num_epochs + 1), dev_accuracies, label='Dev Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.tight_layout()
        plt.show()

        # Evaluation
        model.eval()
        total_correct = 0
        total_samples = 0
        predicted_labels = []
        with torch.no_grad():
            for inputs, labels in dev_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += labels.size(0)
                predicted_labels.extend(predicted.tolist())

        accuracy = total_correct / total_samples

        # Classification report and F1-score
        print("Classification Report:")
        print(classification_report(y_dev, predicted_labels))
        f1 = f1_score(y_dev, predicted_labels, average='weighted')
        print(f"F1-Score: {f1:.4f}")

        return accuracy

    # Define different configurations for hidden layers and units
    hidden_layers_configs = [
        [16, 8],  # 2 hidden layers with 16 and 8 units
        [32, 16, 8],  # 3 hidden layers with 32, 16, and 8 units
        [64, 32, 16, 8]  # 4 hidden layers with 64, 32, 16, and 8 units
    ]

    # Train and evaluate models with different configurations
    best_accuracy = 0.0
    best_model = None
    for hidden_layers in hidden_layers_configs:
        model = DeepModel(input_size=X.shape[1], hidden_sizes=hidden_layers, output_size=y.shape[0])
        accuracy = train_and_evaluate(model, learning_rate)
        print(f"Hidden layers configuration: {hidden_layers}, Accuracy: {accuracy}")
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model

    print(f"Best model hidden layers configuration: {best_model.layers}, Best accuracy: {best_accuracy}")
    test_accuracy = evaluate_accuracy(best_model, test_loader)
    print(f"Test Accuracy: {test_accuracy}")
    return best_model
In [ ]:
def original_ml_pipeline_obj(x, y, test_size = 0.2):
    # Define the machine learning models
    models = {
        'LogisticRegression': LogisticRegression(solver = 'liblinear'),
        'KNN': KNeighborsClassifier(),
        'SVM': SVC(kernel = 'rbf', gamma = 0.1, C = 1.0),
        'Linear SVM': SVC(kernel="linear", C=0.025, random_state=42),
        'DecisionTree': DecisionTreeClassifier(random_state = 42),
        'RandomForest': RandomForestClassifier(n_estimators = 1000, random_state = 42),
        'XGB': XGBClassifier(use_label_encoder = False),
        'LGBM': LGBMClassifier(n_estimators = 1000, random_state = 42, n_jobs = -1, verbosity = -1),
        'GradientBoosting': GradientBoostingClassifier(),
        'MLP Neural Net': MLPClassifier(alpha=1, max_iter=1000, random_state=42),
        "AdaBoost": AdaBoostClassifier(random_state=42),
        "Naive Bayes": GaussianNB(),
        "QDA": QuadraticDiscriminantAnalysis(),
    }

    # Initialize the MultiModelEvaluator
    evaluator = MultiModelEvaluator(models)

    # Split the data into training and testing sets
    evaluator.split_data(x, y, test_size = test_size)

    # Train the models
    evaluator.train_models()

    # Evaluate the models
    evaluator.evaluate_models(evaluator.X_test, evaluator.y_test)

    # Get metric scores for a specific model
    # model_name = 'RandomForest'
    # scores = evaluator.get_metric_scores(model_name)
    # print(f'Metric Scores for Model {model_name}:')
    # for metric, score in scores.items():
    #     print(f'{metric}: {score}')
    return evaluator
In [ ]:
def hyper_tuned_ml_pipeline_obj(x, y, test_size = 0.2):
    # Define the machine learning models
    models = {
        'LogisticRegression': LogisticRegression(solver = 'liblinear'),
        'KNN': KNeighborsClassifier(),
        'SVM': SVC(kernel = 'rbf', gamma = 0.1, C = 1.0),
        'Linear SVM': SVC(kernel="linear", C=0.025, random_state=42),
        'DecisionTree': DecisionTreeClassifier(random_state = 42),
        'RandomForest': RandomForestClassifier(n_estimators = 1000, random_state = 42),
        'XGB': XGBClassifier(use_label_encoder = False),
        'LGBM': LGBMClassifier(n_estimators = 1000, random_state = 42, n_jobs = -1, verbosity = -1),
        'GradientBoosting': GradientBoostingClassifier(),
        'MLP Neural Net': MLPClassifier(alpha=1, max_iter=1000, random_state=42),
        "AdaBoost": AdaBoostClassifier(random_state=42),
        "Naive Bayes": GaussianNB(),
        "QDA": QuadraticDiscriminantAnalysis(),
    }

    n_jobs_values = {

    }

    verbose_values = {

    }

    n_iter_values = {
        'RandomForest': 10,
        'XGB': 150,
        'LGBM': 150,
        'MLP Neural Net': 50,
    }

    # Define parameter grids for hyperparameter tuning
    param_grids = {
        'RandomForest': {'classifier__n_estimators': np.arange(100, 3000, 100), 'classifier__max_features': ['auto', 'sqrt'], 'classifier__max_depth': [2, 3, 5, 10, 15, None], 'classifier__min_samples_split': [2, 5, 10], 'classifier__min_samples_leaf': [1, 2, 4]},
        'DecisionTree': {"classifier__criterion":("gini", "entropy"), "classifier__splitter":("best", "random"), "classifier__max_depth":np.arange(1, 21), "classifier__min_samples_split":[2, 3, 4], "classifier__min_samples_leaf":np.arange(1, 21)},
        'GradientBoosting': {'classifier__n_estimators': [50, 100, 200]},
        'KNN': {'classifier__n_neighbors': np.arange(1, 21)},
        'XGB': {'classifier__max_depth': np.arange(1, 21), 'classifier__learning_rate': np.arange(0, 1.1, 0.1)},
        'SVM': {'classifier__C': [0.1, 0.5, 1, 2, 5, 10, 20], 'classifier__kernel': ['rbf'], "classifier__gamma": [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1]},
        'LogisticRegression': {'classifier__C': np.logspace(-4, 4, 20), "classifier__solver": ["liblinear"], 'classifier__penalty': ['l1', 'l2']},
        'LGBM': {'classifier__learning_rate': np.logspace(np.log(0.01), np.log(1), num = 500, base=3), 'classifier__max_depth': np.arange(5, 15), 'classifier__n_estimators': np.arange(5, 35), 'classifier__num_leaves': np.arange(5, 50), 'classifier__boosting_type': ['gbdt', 'dart'], 'classifier__colsample_bytree': np.linspace(0.6, 1, 500),'classifier__reg_lambda': np.linspace(0, 1, 500)},
        'AdaBoost' : {'classifier__n_estimators': [10, 50, 100, 500], 'classifier__learning_rate': [0.0001, 0.001, 0.01, 0.1, 1.0], 'classifier__random_state': [42]},
        'Naive Bayes': {'classifier__var_smoothing': np.logspace(0,-9, num=100)},
        'MLP Neural Net': {'classifier__hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30)], 'classifier__max_iter': [50, 100, 150],
                           'classifier__activation': ['tanh', 'relu'], 'classifier__solver': ['lbfgs', 'adam'], 'classifier__alpha': [0.0001, 0.05],
                           'classifier__learning_rate': ['constant','adaptive'], 'classifier__random_state': [42]},
        'QDA': {'classifier__reg_param': [0.1, 0.2, 0.3, 0.4, 0.5]},
        'Linear SVM': {'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear']}
        }

    # Initialize the MultiModelEvaluatorWithTuning
    evaluator = MultiModelEvaluatorWithTuning(models, param_grids, n_iter_values = n_iter_values, n_jobs_values = n_jobs_values, verbose_values = verbose_values)

    # Split the data into training and testing sets
    evaluator.split_data(x, y, test_size = test_size)

    # Train the models with hyperparameter tuning
    evaluator.train_models()

    # Evaluate the models
    evaluator.evaluate_models(evaluator.X_test, evaluator.y_test)

    # Get metric scores for a specific model
    # model_name = 'RandomForest'
    # scores = evaluator_tuned.get_metric_scores(model_name)
    # print(f'Metric Scores for Model {model_name}:')
    # for metric, score in scores.items():
    #     print(f'{metric}: {score}')

    return evaluator
In [ ]:
def evaluate_result(evaluator):
    result = {'Model Name': [], 'Train Accuracy': [], 'Train F1 Macro': [], 'Train F1 Weighted': [], 'Train Recall Macro': [],
              'Train Recall Weighted': [], 'Train Precision Macro': [], 'Train Precision Weighted': [],
              'Test Accuracy': [], 'Test F1 Macro': [], 'Test F1 Weighted': [], 'Test Recall Macro': [],
              'Test Recall Weighted': [], 'Test Precision Macro': [], 'Test Precision Weighted': []
              }

    for model_name in evaluator.metric_scores:
        result['Model Name'].append(model_name)
        for metric_name in list(result.keys())[1:]:
            result[metric_name].append(round(evaluator.metric_scores[model_name][metric_name] * 100.00, 2))

    result = pd.DataFrame(result).T
    result.rename(columns=result.iloc[0], inplace = True)
    result.drop(result.index[0], inplace = True)
    result.index = pd.MultiIndex.from_tuples(
        [('Train', metric.replace('Train ', '')) if i < 7 else ('Test', metric.replace('Test ', '')) for i, metric in enumerate(result.index)],
        names=['', 'Metrics'])
    result.index.names = ['',  'Metrics']
    return result
In [ ]:
def evaluate_ann(model, X_train, X_test, y_train, y_test):
    model.eval()
    with torch.no_grad():
        Train_y_pred = torch.max(model(torch.tensor(X_train, dtype=torch.float32)), 1)[1].numpy()
    Train_accuracy = accuracy_score(y_train, Train_y_pred)
    Train_f1_macro = f1_score(y_train, Train_y_pred, average='macro')
    Train_f1_weighted = f1_score(y_train, Train_y_pred, average='weighted')
    Train_recall_macro = recall_score(y_train, Train_y_pred, average='macro')
    Train_recall_weighted = recall_score(y_train, Train_y_pred, average='weighted')
    Train_precision_macro = precision_score(y_train, Train_y_pred, average='macro')
    Train_precision_weighted = precision_score(y_train, Train_y_pred, average='weighted')
    Train_confusion = confusion_matrix(y_train, Train_y_pred)

    model.eval()
    with torch.no_grad():
        Test_y_pred = torch.max(model(torch.tensor(X_test, dtype=torch.float32)), 1)[1].numpy()
    Test_accuracy = accuracy_score(y_test, Test_y_pred)
    Test_f1_macro = f1_score(y_test, Test_y_pred, average='macro')
    Test_f1_weighted = f1_score(y_test, Test_y_pred, average='weighted')
    Test_recall_macro = recall_score(y_test, Test_y_pred, average='macro')
    Test_recall_weighted = recall_score(y_test, Test_y_pred, average='weighted')
    Test_precision_macro = precision_score(y_test, Test_y_pred, average='macro')
    Test_precision_weighted = precision_score(y_test, Test_y_pred, average='weighted')
    Test_confusion = confusion_matrix(y_test, Test_y_pred)

    metrics = {
                    'Train Accuracy': Train_accuracy,
                    'Train F1 Macro': Train_f1_macro,
                    'Train F1 Weighted': Train_f1_weighted,
                    'Train Recall Macro': Train_recall_macro,
                    'Train Recall Weighted': Train_recall_weighted,
                    'Train Precision Macro': Train_precision_macro,
                    'Train Precision Weighted': Train_precision_weighted,
                    'Train Confusion Matrix': Train_confusion,
                    'Test Accuracy': Test_accuracy,
                    'Test F1 Macro': Test_f1_macro,
                    'Test F1 Weighted': Test_f1_weighted,
                    'Test Recall Macro': Test_recall_macro,
                    'Test Recall Weighted': Test_recall_weighted,
                    'Test Precision Macro': Test_precision_macro,
                    'Test Precision Weighted': Test_precision_weighted,
                    'Test Confusion Matrix': Test_confusion
                }

    result = {'Model Name': [], 'Train Accuracy': [], 'Train F1 Macro': [], 'Train F1 Weighted': [], 'Train Recall Macro': [],
              'Train Recall Weighted': [], 'Train Precision Macro': [], 'Train Precision Weighted': [],
              'Test Accuracy': [], 'Test F1 Macro': [], 'Test F1 Weighted': [], 'Test Recall Macro': [],
              'Test Recall Weighted': [], 'Test Precision Macro': [], 'Test Precision Weighted': []
              }
    result['Model Name'].append('ANN')
    for metric_name in list(result.keys())[1:]:
        result[metric_name].append(round(metrics[metric_name] * 100.00, 2))

    result = pd.DataFrame(result).T
    result.rename(columns=result.iloc[0], inplace = True)
    result.drop(result.index[0], inplace = True)
    result.index = pd.MultiIndex.from_tuples(
        [('Train', metric.replace('Train ', '')) if i < 7 else ('Test', metric.replace('Test ', '')) for i, metric in enumerate(result.index)],
        names=['', 'Metrics'])
    result.index.names = ['',  'Metrics']
    return metrics, result
In [ ]:
def plot_feature_importances(models, model_names, feature_names):
    """
    Plot feature importances for a list of machine learning models.

    Parameters:
    - models (list): List of trained models.
    - model_names (list): Names of the models for labeling in the plot.
    - feature_names (list): Names of the features for labeling in the plot.

    Returns:
    - None
    """
    num_models = len(models)
    num_features = len(feature_names)

    for i in range(num_models):
        plt.figure(figsize=(10, 6))

        if isinstance(models[i], DecisionTreeClassifier) or isinstance(models[i], RandomForestClassifier):
            importances = models[i].feature_importances_
        elif isinstance(models[i], XGBClassifier) or isinstance(models[i], AdaBoostClassifier):
            importances = models[i].feature_importances_
        elif isinstance(models[i], LGBMClassifier):
            importances = models[i].feature_importances_
        elif isinstance(models[i], LogisticRegression):
            importances = np.abs(models[i].coef_[0])
        elif isinstance(models[i], MLPClassifier):
            importances = [np.sum(np.abs(layer), axis=1) for layer in [layer / np.linalg.norm(layer, ord=2, axis=0)
                            for layer in models[i].coefs_]][0]
        # elif isinstance(models[i], KNeighborsClassifier) or isinstance(models[i], SVC):
        #     importances = [1] * num_features  # KNN and SVM doesn't have feature importance, for instance let's say every feature importance is equal
        elif isinstance(models[i], GradientBoostingClassifier):
            importances = models[i].feature_importances_
        else:
            pass  # KNN, SVM, Naive Bayes and QDA doesn't have feature importance
            # raise ValueError(f"Unsupported model type: {type(models[i])}")

        # Sort feature importances in descending order
        sorted_indices = np.argsort(importances)[::-1]
        sorted_importances = [importances[idx] for idx in sorted_indices]  # Convert to a list of values
        sorted_feature_names = [feature_names[idx] for idx in sorted_indices]

        plt.bar(range(num_features), sorted_importances, tick_label=sorted_feature_names)
        plt.title(f'Feature Importances for {model_names[i]}')
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()
In [ ]:
# for feature scaling
from sklearn.preprocessing import StandardScaler
st_x = StandardScaler()
  • Position: {1: 'backside', 2: 'frontside', 3: 'middle'}
  • Room Condition: {1: 'ac', 2: 'non ac'}
  • Room Type: {1: 'classroom', 2: 'lab'}
  • Weather: {1: 'cloudy', 2: 'overcast', 3: 'rainy', 4: 'sunny'}
  • Lab has always ac
  • Classroom has ac and non-ac

AC Lab ML Machine Learning Model¶

In [ ]:
# Preparing data for ML
df_tmp = new_df.query("`Room Type` == 2")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
ac_lab_x = df_tmp.drop('Occupancy_Classified', axis = 1)
ac_lab_y = df_tmp['Occupancy_Classified'].values  # converting to numpy array

# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
ac_lab_x = st_x.fit_transform(ac_lab_x)
In [ ]:
# @title Experimenting our dataset with Dimension Reduction Techniques (t-SNE)

!pip install openTSNE
from openTSNE import TSNE
n = 4

ac_lab_x = df_tmp.drop('Occupancy_Classified', axis = 1).to_numpy()
ac_lab_y = df_tmp['Occupancy_Classified'].values  # converting to numpy array

model = TSNE(
    n_components = n,
    perplexity=25,
    metric="euclidean",
    n_jobs=-1,
    random_state=42,
    verbose=True,
)
tsne_data = model.fit(ac_lab_x)
tsne_data = np.vstack((tsne_data.T, ac_lab_y)).T
tsne_df = pd.DataFrame(data = tsne_data,
     columns = [f'Dim_{i}' for i in range(1, n + 1)] + ['label'])
sns.pairplot(tsne_df, hue='label', palette="bright")
plt.show()


tsne_df.corr()['label']


# evaluator_ac_lab = original_ml_pipeline_obj(tsne_df.drop('label', axis = 1), tsne_df['label'], test_size = 0.4)


# result_ac_lab = evaluate_result(evaluator_ac_lab)
# result_ac_lab


# metric_to_show = 'Accuracy'
# ax = result_ac_lab.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
# plt.title(f'Train and Test {metric_to_show} for Different Models')
# plt.xlabel('Model')
# plt.ylabel(metric_to_show)
# plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
# plt.legend(loc='best')
# plt.grid(True)
# plt.show()


# ann_model_ac_lab = ann_model(tsne_data[:, :3], tsne_data[:, 3], test_size = 0.4)
# evaluate_ann(ann_model_ac_lab, *train_test_split(tsne_data[:, :3], tsne_data[:, 3], test_size=0.4, random_state=42))[1]
In [ ]:
evaluator_ac_lab = original_ml_pipeline_obj(ac_lab_x, ac_lab_y, test_size = 0.4)
================================================
LogisticRegression model has started training
LogisticRegression model has ended training. Time -> 0.04s. Accuracy - > 76.65 %
================================================


================================================
KNN model has started training
KNN model has ended training. Time -> 0.01s. Accuracy - > 93.99 %
================================================


================================================
SVM model has started training
SVM model has ended training. Time -> 0.1s. Accuracy - > 83.67 %
================================================


================================================
Linear SVM model has started training
Linear SVM model has ended training. Time -> 0.11s. Accuracy - > 73.94 %
================================================


================================================
DecisionTree model has started training
DecisionTree model has ended training. Time -> 0.01s. Accuracy - > 97.29 %
================================================


================================================
RandomForest model has started training
RandomForest model has ended training. Time -> 3.26s. Accuracy - > 98.41 %
================================================


================================================
XGB model has started training
XGB model has ended training. Time -> 1.98s. Accuracy - > 98.17 %
================================================


================================================
LGBM model has started training
LGBM model has ended training. Time -> 4.11s. Accuracy - > 97.70 %
================================================


================================================
GradientBoosting model has started training
GradientBoosting model has ended training. Time -> 2.81s. Accuracy - > 97.88 %
================================================


================================================
MLP Neural Net model has started training
MLP Neural Net model has ended training. Time -> 2.69s. Accuracy - > 86.50 %
================================================


================================================
AdaBoost model has started training
AdaBoost model has ended training. Time -> 0.21s. Accuracy - > 43.51 %
================================================


================================================
Naive Bayes model has started training
Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 57.49 %
================================================


================================================
QDA model has started training
QDA model has ended training. Time -> 0.03s. Accuracy - > 34.91 %
================================================

In [ ]:
result_ac_lab = evaluate_result(evaluator_ac_lab)
result_ac_lab
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 75.18 97.29 83.32 73.25 100.0 100.0 99.92 99.84 100.0 86.31 42.25 56.45 33.4
F1 Macro 60.59 96.02 75.55 53.63 100.0 100.0 99.79 99.73 100.0 79.19 22.97 41.23 7.15
F1 Weighted 72.13 97.28 82.42 69.2 100.0 100.0 99.92 99.84 100.0 85.64 31.42 50.4 16.72
Recall Macro 60.89 95.45 72.76 56.39 100.0 100.0 99.79 99.88 100.0 77.41 33.63 50.38 14.29
Recall Weighted 75.18 97.29 83.32 73.25 100.0 100.0 99.92 99.84 100.0 86.31 42.25 56.45 33.4
Precision Macro 67.28 96.65 86.75 52.53 100.0 100.0 99.79 99.59 100.0 85.67 18.26 58.89 4.77
Precision Weighted 71.66 97.29 84.91 66.94 100.0 100.0 99.92 99.84 100.0 87.01 26.59 64.75 11.15
Test Accuracy 76.65 93.99 83.67 73.94 97.29 98.41 98.17 97.7 97.88 86.5 43.51 57.49 34.91
F1 Macro 61.96 90.37 74.4 53.66 95.39 97.78 97.11 96.68 96.58 78.88 23.29 41.67 7.39
F1 Weighted 73.78 93.96 82.78 70.18 97.29 98.41 98.17 97.7 97.87 85.87 33.21 52.43 18.06
Recall Macro 61.53 89.84 71.37 55.85 95.3 97.68 96.87 96.45 96.0 77.28 33.61 51.34 14.29
Recall Weighted 76.65 93.99 83.67 73.94 97.29 98.41 98.17 97.7 97.88 86.5 43.51 57.49 34.91
Precision Macro 68.13 90.96 86.34 53.12 95.54 97.91 97.38 96.94 97.26 84.01 18.84 58.82 4.99
Precision Weighted 72.93 93.96 85.18 68.18 97.32 98.42 98.19 97.72 97.89 86.93 28.76 66.36 12.18
In [ ]:
metric_to_show = 'Accuracy'

ax = result_ac_lab.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
evaluator_ac_lab_hyper_tuned = hyper_tuned_ml_pipeline_obj(ac_lab_x, ac_lab_y, test_size = 0.4)
================================================
LogisticRegression tuned model has started training
Fitting 5 folds for each of 40 candidates, totalling 200 fits
LogisticRegression tuned model has ended training. Time -> 109.5s. Accuracy - > 78.24 %
================================================


================================================
KNN tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
KNN tuned model has ended training. Time -> 5.01s. Accuracy - > 96.52 %
================================================


================================================
SVM tuned model has started training
Fitting 5 folds for each of 49 candidates, totalling 245 fits
SVM tuned model has ended training. Time -> 28.6s. Accuracy - > 93.93 %
================================================


================================================
Linear SVM tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Linear SVM tuned model has ended training. Time -> 2.17s. Accuracy - > 86.32 %
================================================


================================================
DecisionTree tuned model has started training
Fitting 5 folds for each of 4800 candidates, totalling 24000 fits
DecisionTree tuned model has ended training. Time -> 138.99s. Accuracy - > 96.82 %
================================================


================================================
RandomForest tuned model has started training
Fitting 5 folds for each of 10 candidates, totalling 50 fits
RandomForest tuned model has ended training. Time -> 197.37s. Accuracy - > 97.88 %
================================================


================================================
XGB tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
XGB tuned model has ended training. Time -> 185.72s. Accuracy - > 98.23 %
================================================


================================================
LGBM tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
LGBM tuned model has ended training. Time -> 172.55s. Accuracy - > 97.88 %
================================================


================================================
GradientBoosting tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
GradientBoosting tuned model has ended training. Time -> 46.77s. Accuracy - > 97.82 %
================================================


================================================
MLP Neural Net tuned model has started training
Fitting 5 folds for each of 50 candidates, totalling 250 fits
MLP Neural Net tuned model has ended training. Time -> 786.21s. Accuracy - > 94.93 %
================================================


================================================
AdaBoost tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
AdaBoost tuned model has ended training. Time -> 59.32s. Accuracy - > 71.17 %
================================================


================================================
Naive Bayes tuned model has started training
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Naive Bayes tuned model has ended training. Time -> 2.17s. Accuracy - > 64.45 %
================================================


================================================
QDA tuned model has started training
Fitting 5 folds for each of 5 candidates, totalling 25 fits
QDA tuned model has ended training. Time -> 0.21s. Accuracy - > 69.99 %
================================================

In [ ]:
result_ac_lab_hyper_tuned = evaluate_result(evaluator_ac_lab_hyper_tuned)
result_ac_lab_hyper_tuned
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 77.62 100.0 95.83 86.15 100.0 99.57 99.92 99.84 100.0 97.25 69.79 65.38 70.02
F1 Macro 70.58 100.0 94.37 80.13 100.0 99.4 99.79 99.73 100.0 95.69 53.34 60.57 59.54
F1 Weighted 75.69 100.0 95.82 85.09 100.0 99.57 99.92 99.84 100.0 97.23 66.48 65.16 68.01
Recall Macro 70.03 100.0 93.44 77.99 100.0 99.34 99.79 99.88 100.0 94.71 55.56 64.78 63.49
Recall Weighted 77.62 100.0 95.83 86.15 100.0 99.57 99.92 99.84 100.0 97.25 69.79 65.38 70.02
Precision Macro 75.52 100.0 95.51 88.78 100.0 99.47 99.79 99.59 100.0 96.87 58.95 65.15 59.2
Precision Weighted 76.01 100.0 95.89 86.75 100.0 99.57 99.92 99.84 100.0 97.29 67.37 68.9 67.77
Test Accuracy 78.24 96.52 93.93 86.32 96.82 97.88 98.23 97.88 97.82 94.93 71.17 64.45 69.99
F1 Macro 69.87 95.01 91.52 80.41 94.21 97.41 97.22 97.05 96.53 91.72 54.42 58.22 58.62
F1 Weighted 76.42 96.52 93.96 85.46 96.79 97.88 98.23 97.88 97.81 94.88 68.36 64.65 68.33
Recall Macro 68.92 95.14 91.13 78.23 93.37 97.45 96.96 96.88 95.93 90.65 55.72 63.22 63.06
Recall Weighted 78.24 96.52 93.93 86.32 96.82 97.88 98.23 97.88 97.82 94.93 71.17 64.45 69.99
Precision Macro 75.92 94.89 92.09 88.26 95.27 97.41 97.51 97.24 97.24 92.97 63.21 62.25 59.02
Precision Weighted 77.03 96.53 94.05 87.04 96.81 97.91 98.24 97.89 97.84 94.9 70.74 68.73 68.68
In [ ]:
metric_to_show = 'Accuracy'

result_ac_lab_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_lab_hyper_tuned.columns.to_numpy())), result_ac_lab_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
ann_model_ac_lab = ann_model(ac_lab_x, ac_lab_y, test_size = 0.4)
Epoch 1/50, Train Loss: 7.5679, Train Accuracy: 0.3088, Dev Accuracy: 0.3090
Epoch 2/50, Train Loss: 6.4552, Train Accuracy: 0.3812, Dev Accuracy: 0.3620
Epoch 3/50, Train Loss: 5.2430, Train Accuracy: 0.4555, Dev Accuracy: 0.4528
Epoch 4/50, Train Loss: 3.7336, Train Accuracy: 0.4500, Dev Accuracy: 0.4281
Epoch 5/50, Train Loss: 3.4900, Train Accuracy: 0.4772, Dev Accuracy: 0.4505
Epoch 6/50, Train Loss: 3.0107, Train Accuracy: 0.4941, Dev Accuracy: 0.4682
Epoch 7/50, Train Loss: 1.8069, Train Accuracy: 0.6424, Dev Accuracy: 0.6533
Epoch 8/50, Train Loss: 1.0936, Train Accuracy: 0.6900, Dev Accuracy: 0.6969
Epoch 9/50, Train Loss: 0.9546, Train Accuracy: 0.7781, Dev Accuracy: 0.7689
Epoch 10/50, Train Loss: 2.1245, Train Accuracy: 0.7789, Dev Accuracy: 0.7748
Epoch 11/50, Train Loss: 1.7963, Train Accuracy: 0.8029, Dev Accuracy: 0.8031
Epoch 12/50, Train Loss: 0.5580, Train Accuracy: 0.7939, Dev Accuracy: 0.7830
Epoch 13/50, Train Loss: 0.5155, Train Accuracy: 0.7974, Dev Accuracy: 0.7913
Epoch 14/50, Train Loss: 0.9153, Train Accuracy: 0.8151, Dev Accuracy: 0.8160
Epoch 15/50, Train Loss: 0.9304, Train Accuracy: 0.8006, Dev Accuracy: 0.7877
Epoch 16/50, Train Loss: 0.2263, Train Accuracy: 0.8045, Dev Accuracy: 0.7983
Epoch 17/50, Train Loss: 0.5137, Train Accuracy: 0.8017, Dev Accuracy: 0.8007
Epoch 18/50, Train Loss: 0.9767, Train Accuracy: 0.8175, Dev Accuracy: 0.8160
Epoch 19/50, Train Loss: 0.6474, Train Accuracy: 0.8124, Dev Accuracy: 0.8125
Epoch 20/50, Train Loss: 0.6372, Train Accuracy: 0.8403, Dev Accuracy: 0.8384
Epoch 21/50, Train Loss: 0.5639, Train Accuracy: 0.8183, Dev Accuracy: 0.8231
Epoch 22/50, Train Loss: 0.4555, Train Accuracy: 0.8297, Dev Accuracy: 0.8208
Epoch 23/50, Train Loss: 0.6009, Train Accuracy: 0.8084, Dev Accuracy: 0.7983
Epoch 24/50, Train Loss: 1.1390, Train Accuracy: 0.8143, Dev Accuracy: 0.8160
Epoch 25/50, Train Loss: 0.5916, Train Accuracy: 0.8139, Dev Accuracy: 0.8184
Epoch 26/50, Train Loss: 0.7654, Train Accuracy: 0.8269, Dev Accuracy: 0.8172
Epoch 27/50, Train Loss: 1.2820, Train Accuracy: 0.8348, Dev Accuracy: 0.8184
Epoch 28/50, Train Loss: 0.5775, Train Accuracy: 0.8289, Dev Accuracy: 0.8219
Epoch 29/50, Train Loss: 1.0680, Train Accuracy: 0.8206, Dev Accuracy: 0.8101
Epoch 30/50, Train Loss: 0.7345, Train Accuracy: 0.8245, Dev Accuracy: 0.8149
Epoch 31/50, Train Loss: 0.4530, Train Accuracy: 0.8395, Dev Accuracy: 0.8208
Epoch 32/50, Train Loss: 0.3788, Train Accuracy: 0.8230, Dev Accuracy: 0.8290
Epoch 33/50, Train Loss: 0.8172, Train Accuracy: 0.8289, Dev Accuracy: 0.8267
Epoch 34/50, Train Loss: 0.5156, Train Accuracy: 0.8360, Dev Accuracy: 0.8219
Epoch 35/50, Train Loss: 0.4037, Train Accuracy: 0.8344, Dev Accuracy: 0.8243
Epoch 36/50, Train Loss: 0.8318, Train Accuracy: 0.8183, Dev Accuracy: 0.8219
Epoch 37/50, Train Loss: 0.4988, Train Accuracy: 0.8285, Dev Accuracy: 0.8396
Epoch 38/50, Train Loss: 0.4708, Train Accuracy: 0.8242, Dev Accuracy: 0.8172
Epoch 39/50, Train Loss: 0.9334, Train Accuracy: 0.8407, Dev Accuracy: 0.8337
Epoch 40/50, Train Loss: 0.7260, Train Accuracy: 0.8423, Dev Accuracy: 0.8337
Epoch 41/50, Train Loss: 0.3315, Train Accuracy: 0.8356, Dev Accuracy: 0.8325
Epoch 42/50, Train Loss: 0.7459, Train Accuracy: 0.8308, Dev Accuracy: 0.8243
Epoch 43/50, Train Loss: 0.4402, Train Accuracy: 0.8375, Dev Accuracy: 0.8420
Epoch 44/50, Train Loss: 0.9257, Train Accuracy: 0.8340, Dev Accuracy: 0.8314
Epoch 45/50, Train Loss: 0.7405, Train Accuracy: 0.8238, Dev Accuracy: 0.8325
Epoch 46/50, Train Loss: 0.6862, Train Accuracy: 0.8560, Dev Accuracy: 0.8573
Epoch 47/50, Train Loss: 1.3168, Train Accuracy: 0.8438, Dev Accuracy: 0.8255
Epoch 48/50, Train Loss: 0.2608, Train Accuracy: 0.8450, Dev Accuracy: 0.8373
Epoch 49/50, Train Loss: 0.5127, Train Accuracy: 0.8301, Dev Accuracy: 0.8290
Epoch 50/50, Train Loss: 1.0933, Train Accuracy: 0.8320, Dev Accuracy: 0.8196
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.96      0.90       292
           1       0.78      0.79      0.78       110
           2       0.82      0.85      0.83       152
           3       0.95      0.81      0.87        73
           4       0.83      0.31      0.45        49
           5       0.73      0.84      0.78       148
           6       0.00      0.00      0.00        24

    accuracy                           0.82       848
   macro avg       0.71      0.65      0.66       848
weighted avg       0.80      0.82      0.80       848

F1-Score: 0.8002
Hidden layers configuration: [16, 8], Accuracy: 0.8195754716981132
Epoch 1/50, Train Loss: 7.4726, Train Accuracy: 0.3006, Dev Accuracy: 0.2771
Epoch 2/50, Train Loss: 6.1961, Train Accuracy: 0.5268, Dev Accuracy: 0.5024
Epoch 3/50, Train Loss: 4.4576, Train Accuracy: 0.5732, Dev Accuracy: 0.5401
Epoch 4/50, Train Loss: 2.5779, Train Accuracy: 0.5814, Dev Accuracy: 0.5531
Epoch 5/50, Train Loss: 2.0403, Train Accuracy: 0.5779, Dev Accuracy: 0.5495
Epoch 6/50, Train Loss: 2.7736, Train Accuracy: 0.6003, Dev Accuracy: 0.5696
Epoch 7/50, Train Loss: 2.2470, Train Accuracy: 0.6920, Dev Accuracy: 0.6781
Epoch 8/50, Train Loss: 1.6335, Train Accuracy: 0.7766, Dev Accuracy: 0.7818
Epoch 9/50, Train Loss: 1.1709, Train Accuracy: 0.7832, Dev Accuracy: 0.7889
Epoch 10/50, Train Loss: 1.7481, Train Accuracy: 0.8222, Dev Accuracy: 0.8184
Epoch 11/50, Train Loss: 0.3060, Train Accuracy: 0.8124, Dev Accuracy: 0.8137
Epoch 12/50, Train Loss: 0.7900, Train Accuracy: 0.7978, Dev Accuracy: 0.7983
Epoch 13/50, Train Loss: 0.5404, Train Accuracy: 0.8041, Dev Accuracy: 0.8031
Epoch 14/50, Train Loss: 0.7373, Train Accuracy: 0.8419, Dev Accuracy: 0.8384
Epoch 15/50, Train Loss: 0.6097, Train Accuracy: 0.8116, Dev Accuracy: 0.8137
Epoch 16/50, Train Loss: 2.0310, Train Accuracy: 0.7608, Dev Accuracy: 0.7547
Epoch 17/50, Train Loss: 0.5048, Train Accuracy: 0.8576, Dev Accuracy: 0.8479
Epoch 18/50, Train Loss: 0.4645, Train Accuracy: 0.8596, Dev Accuracy: 0.8550
Epoch 19/50, Train Loss: 1.8805, Train Accuracy: 0.7415, Dev Accuracy: 0.7347
Epoch 20/50, Train Loss: 0.6586, Train Accuracy: 0.8360, Dev Accuracy: 0.8349
Epoch 21/50, Train Loss: 1.2055, Train Accuracy: 0.8308, Dev Accuracy: 0.8208
Epoch 22/50, Train Loss: 0.3086, Train Accuracy: 0.8670, Dev Accuracy: 0.8656
Epoch 23/50, Train Loss: 0.4957, Train Accuracy: 0.8702, Dev Accuracy: 0.8573
Epoch 24/50, Train Loss: 1.1060, Train Accuracy: 0.8320, Dev Accuracy: 0.8160
Epoch 25/50, Train Loss: 1.3113, Train Accuracy: 0.8308, Dev Accuracy: 0.8066
Epoch 26/50, Train Loss: 0.7811, Train Accuracy: 0.8505, Dev Accuracy: 0.8443
Epoch 27/50, Train Loss: 0.6761, Train Accuracy: 0.8757, Dev Accuracy: 0.8691
Epoch 28/50, Train Loss: 0.5585, Train Accuracy: 0.8544, Dev Accuracy: 0.8479
Epoch 29/50, Train Loss: 0.5659, Train Accuracy: 0.8820, Dev Accuracy: 0.8691
Epoch 30/50, Train Loss: 0.8611, Train Accuracy: 0.8293, Dev Accuracy: 0.8125
Epoch 31/50, Train Loss: 0.6248, Train Accuracy: 0.8509, Dev Accuracy: 0.8491
Epoch 32/50, Train Loss: 0.9830, Train Accuracy: 0.8438, Dev Accuracy: 0.8479
Epoch 33/50, Train Loss: 0.6630, Train Accuracy: 0.8525, Dev Accuracy: 0.8443
Epoch 34/50, Train Loss: 0.5949, Train Accuracy: 0.8501, Dev Accuracy: 0.8491
Epoch 35/50, Train Loss: 0.3742, Train Accuracy: 0.8800, Dev Accuracy: 0.8703
Epoch 36/50, Train Loss: 0.4591, Train Accuracy: 0.8619, Dev Accuracy: 0.8467
Epoch 37/50, Train Loss: 0.4808, Train Accuracy: 0.8285, Dev Accuracy: 0.8137
Epoch 38/50, Train Loss: 0.3475, Train Accuracy: 0.8584, Dev Accuracy: 0.8526
Epoch 39/50, Train Loss: 1.3753, Train Accuracy: 0.7891, Dev Accuracy: 0.7606
Epoch 40/50, Train Loss: 0.6399, Train Accuracy: 0.8560, Dev Accuracy: 0.8408
Epoch 41/50, Train Loss: 0.5183, Train Accuracy: 0.8560, Dev Accuracy: 0.8608
Epoch 42/50, Train Loss: 1.0500, Train Accuracy: 0.8639, Dev Accuracy: 0.8502
Epoch 43/50, Train Loss: 0.8047, Train Accuracy: 0.8682, Dev Accuracy: 0.8561
Epoch 44/50, Train Loss: 0.6796, Train Accuracy: 0.8513, Dev Accuracy: 0.8455
Epoch 45/50, Train Loss: 0.6205, Train Accuracy: 0.8706, Dev Accuracy: 0.8691
Epoch 46/50, Train Loss: 0.2211, Train Accuracy: 0.8851, Dev Accuracy: 0.8703
Epoch 47/50, Train Loss: 1.2657, Train Accuracy: 0.8662, Dev Accuracy: 0.8573
Epoch 48/50, Train Loss: 0.5114, Train Accuracy: 0.8954, Dev Accuracy: 0.8939
Epoch 49/50, Train Loss: 0.7127, Train Accuracy: 0.8682, Dev Accuracy: 0.8691
Epoch 50/50, Train Loss: 0.2078, Train Accuracy: 0.8659, Dev Accuracy: 0.8502
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.91      0.95       292
           1       0.74      0.81      0.77       110
           2       0.80      0.84      0.82       152
           3       0.96      0.89      0.92        73
           4       0.83      0.39      0.53        49
           5       0.74      0.99      0.85       148
           6       1.00      0.25      0.40        24

    accuracy                           0.85       848
   macro avg       0.86      0.73      0.75       848
weighted avg       0.86      0.85      0.84       848

F1-Score: 0.8424
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8502358490566038
Epoch 1/50, Train Loss: 7.3063, Train Accuracy: 0.1459, Dev Accuracy: 0.1285
Epoch 2/50, Train Loss: 6.3056, Train Accuracy: 0.6743, Dev Accuracy: 0.6686
Epoch 3/50, Train Loss: 5.9427, Train Accuracy: 0.6998, Dev Accuracy: 0.6863
Epoch 4/50, Train Loss: 2.8393, Train Accuracy: 0.7065, Dev Accuracy: 0.7193
Epoch 5/50, Train Loss: 4.4799, Train Accuracy: 0.6448, Dev Accuracy: 0.6545
Epoch 6/50, Train Loss: 2.4085, Train Accuracy: 0.7557, Dev Accuracy: 0.7618
Epoch 7/50, Train Loss: 1.7595, Train Accuracy: 0.7958, Dev Accuracy: 0.8031
Epoch 8/50, Train Loss: 2.0559, Train Accuracy: 0.7911, Dev Accuracy: 0.7842
Epoch 9/50, Train Loss: 1.6194, Train Accuracy: 0.7876, Dev Accuracy: 0.7665
Epoch 10/50, Train Loss: 1.2184, Train Accuracy: 0.8438, Dev Accuracy: 0.8455
Epoch 11/50, Train Loss: 0.7580, Train Accuracy: 0.8249, Dev Accuracy: 0.8302
Epoch 12/50, Train Loss: 0.6332, Train Accuracy: 0.8131, Dev Accuracy: 0.7936
Epoch 13/50, Train Loss: 1.1190, Train Accuracy: 0.8281, Dev Accuracy: 0.8196
Epoch 14/50, Train Loss: 1.3169, Train Accuracy: 0.8509, Dev Accuracy: 0.8467
Epoch 15/50, Train Loss: 0.4554, Train Accuracy: 0.8415, Dev Accuracy: 0.8373
Epoch 16/50, Train Loss: 0.5193, Train Accuracy: 0.8552, Dev Accuracy: 0.8538
Epoch 17/50, Train Loss: 1.1831, Train Accuracy: 0.8316, Dev Accuracy: 0.8160
Epoch 18/50, Train Loss: 1.5159, Train Accuracy: 0.8509, Dev Accuracy: 0.8396
Epoch 19/50, Train Loss: 0.5843, Train Accuracy: 0.8635, Dev Accuracy: 0.8455
Epoch 20/50, Train Loss: 0.4796, Train Accuracy: 0.8682, Dev Accuracy: 0.8644
Epoch 21/50, Train Loss: 0.5932, Train Accuracy: 0.8371, Dev Accuracy: 0.8219
Epoch 22/50, Train Loss: 1.0192, Train Accuracy: 0.8619, Dev Accuracy: 0.8550
Epoch 23/50, Train Loss: 0.9314, Train Accuracy: 0.8501, Dev Accuracy: 0.8420
Epoch 24/50, Train Loss: 0.4399, Train Accuracy: 0.8564, Dev Accuracy: 0.8408
Epoch 25/50, Train Loss: 0.4603, Train Accuracy: 0.8887, Dev Accuracy: 0.8844
Epoch 26/50, Train Loss: 0.2975, Train Accuracy: 0.8615, Dev Accuracy: 0.8479
Epoch 27/50, Train Loss: 0.8075, Train Accuracy: 0.8588, Dev Accuracy: 0.8573
Epoch 28/50, Train Loss: 0.5097, Train Accuracy: 0.8529, Dev Accuracy: 0.8443
Epoch 29/50, Train Loss: 0.8670, Train Accuracy: 0.8792, Dev Accuracy: 0.8585
Epoch 30/50, Train Loss: 0.3063, Train Accuracy: 0.8643, Dev Accuracy: 0.8526
Epoch 31/50, Train Loss: 0.8306, Train Accuracy: 0.8816, Dev Accuracy: 0.8762
Epoch 32/50, Train Loss: 0.8839, Train Accuracy: 0.8611, Dev Accuracy: 0.8455
Epoch 33/50, Train Loss: 0.5090, Train Accuracy: 0.8647, Dev Accuracy: 0.8361
Epoch 34/50, Train Loss: 0.4229, Train Accuracy: 0.8721, Dev Accuracy: 0.8667
Epoch 35/50, Train Loss: 0.3537, Train Accuracy: 0.8769, Dev Accuracy: 0.8726
Epoch 36/50, Train Loss: 0.7091, Train Accuracy: 0.8839, Dev Accuracy: 0.8797
Epoch 37/50, Train Loss: 0.4488, Train Accuracy: 0.8670, Dev Accuracy: 0.8667
Epoch 38/50, Train Loss: 0.4762, Train Accuracy: 0.8729, Dev Accuracy: 0.8679
Epoch 39/50, Train Loss: 0.9091, Train Accuracy: 0.8725, Dev Accuracy: 0.8750
Epoch 40/50, Train Loss: 0.5778, Train Accuracy: 0.8792, Dev Accuracy: 0.8632
Epoch 41/50, Train Loss: 0.6472, Train Accuracy: 0.8682, Dev Accuracy: 0.8550
Epoch 42/50, Train Loss: 0.2584, Train Accuracy: 0.8698, Dev Accuracy: 0.8691
Epoch 43/50, Train Loss: 0.7754, Train Accuracy: 0.8792, Dev Accuracy: 0.8715
Epoch 44/50, Train Loss: 0.8772, Train Accuracy: 0.8702, Dev Accuracy: 0.8667
Epoch 45/50, Train Loss: 0.2729, Train Accuracy: 0.8686, Dev Accuracy: 0.8620
Epoch 46/50, Train Loss: 0.7849, Train Accuracy: 0.8887, Dev Accuracy: 0.8844
Epoch 47/50, Train Loss: 0.2453, Train Accuracy: 0.8741, Dev Accuracy: 0.8514
Epoch 48/50, Train Loss: 1.1743, Train Accuracy: 0.8607, Dev Accuracy: 0.8325
Epoch 49/50, Train Loss: 0.4111, Train Accuracy: 0.8584, Dev Accuracy: 0.8514
Epoch 50/50, Train Loss: 0.4506, Train Accuracy: 0.8765, Dev Accuracy: 0.8703
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       292
           1       0.73      0.85      0.79       110
           2       0.90      0.80      0.84       152
           3       0.96      0.89      0.92        73
           4       0.65      0.53      0.58        49
           5       0.85      0.91      0.88       148
           6       0.74      0.83      0.78        24

    accuracy                           0.87       848
   macro avg       0.83      0.82      0.82       848
weighted avg       0.87      0.87      0.87       848

F1-Score: 0.8697
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.8702830188679245
Best model hidden layers configuration: Sequential(
  (0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Linear(in_features=9, out_features=64, bias=True)
  (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=32, out_features=16, bias=True)
  (8): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU()
  (10): Linear(in_features=16, out_features=8, bias=True)
  (11): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU()
  (13): Linear(in_features=8, out_features=4238, bias=True)
), Best accuracy: 0.8702830188679245
Test Accuracy: 0.8856132075471698
In [ ]:
evaluate_ann(ann_model_ac_lab, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
Out[ ]:
ANN
Metrics
Train Accuracy 88.12
F1 Macro 82.65
F1 Weighted 87.59
Recall Macro 80.43
Recall Weighted 88.12
Precision Macro 87.01
Precision Weighted 88.5
Test Accuracy 87.38
F1 Macro 81.12
F1 Weighted 86.96
Recall Macro 78.81
Recall Weighted 87.38
Precision Macro 85.23
Precision Weighted 87.7
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_ac_lab_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_ac_lab.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_ac_lab.columns.to_numpy())), result_ac_lab.columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image
In [ ]:
model_names, models = [], []
for model_name in evaluator_ac_lab_hyper_tuned.model_names:
    model_names.append(model_name)
    models.append(evaluator_ac_lab_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Objects ->

  • evaluator_ac_lab
  • evaluator_ac_lab_hyper_tuned

Results ->

  • result_ac_lab
  • result_ac_lab_hyper_tuned

AC Classroom ML Machine Learning Model¶

In [ ]:
# Preapring data for ML
df_tmp = new_df.query("`Room Type` == 1 and `Room Condition` == 1")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
ac_classroom_x = df_tmp.drop('Occupancy_Classified', axis = 1)
ac_classroom_y = df_tmp['Occupancy_Classified'].values  # converting to numpy array

# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
ac_classroom_x = st_x.fit_transform(ac_classroom_x)
In [ ]:

In [ ]:
# ML Training without hyper tuned
evaluator_ac_classroom = original_ml_pipeline_obj(ac_classroom_x, ac_classroom_y, test_size = 0.4)
================================================
LogisticRegression model has started training
LogisticRegression model has ended training. Time -> 0.03s. Accuracy - > 73.10 %
================================================


================================================
KNN model has started training
KNN model has ended training. Time -> 0.0s. Accuracy - > 93.75 %
================================================


================================================
SVM model has started training
SVM model has ended training. Time -> 0.08s. Accuracy - > 83.35 %
================================================


================================================
Linear SVM model has started training
Linear SVM model has ended training. Time -> 0.11s. Accuracy - > 69.86 %
================================================


================================================
DecisionTree model has started training
DecisionTree model has ended training. Time -> 0.01s. Accuracy - > 97.66 %
================================================


================================================
RandomForest model has started training
RandomForest model has ended training. Time -> 4.18s. Accuracy - > 97.59 %
================================================


================================================
XGB model has started training
XGB model has ended training. Time -> 1.09s. Accuracy - > 97.51 %
================================================


================================================
LGBM model has started training
LGBM model has ended training. Time -> 4.4s. Accuracy - > 97.59 %
================================================


================================================
GradientBoosting model has started training
GradientBoosting model has ended training. Time -> 2.1s. Accuracy - > 97.81 %
================================================


================================================
MLP Neural Net model has started training
MLP Neural Net model has ended training. Time -> 2.65s. Accuracy - > 84.40 %
================================================


================================================
AdaBoost model has started training
AdaBoost model has ended training. Time -> 0.2s. Accuracy - > 50.26 %
================================================


================================================
Naive Bayes model has started training
Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 34.06 %
================================================


================================================
QDA model has started training
QDA model has ended training. Time -> 0.0s. Accuracy - > 2.19 %
================================================

In [ ]:
result_ac_classroom = evaluate_result(evaluator_ac_classroom)
result_ac_classroom
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 72.55 96.28 81.6 67.67 99.8 99.8 99.6 99.5 99.5 82.45 51.63 33.69 2.87
F1 Macro 64.14 95.59 72.96 48.36 99.73 99.73 99.6 99.43 99.27 72.35 39.43 31.88 0.93
F1 Weighted 70.28 96.27 79.18 64.7 99.8 99.8 99.6 99.5 99.49 79.88 40.88 22.22 0.16
Recall Macro 63.54 95.43 73.61 50.68 99.77 99.56 99.45 99.28 98.95 72.76 41.17 49.44 16.67
Recall Weighted 72.55 96.28 81.6 67.67 99.8 99.8 99.6 99.5 99.5 82.45 51.63 33.69 2.87
Precision Macro 66.48 95.76 88.99 62.64 99.69 99.9 99.75 99.58 99.61 72.5 53.73 45.86 0.48
Precision Weighted 68.57 96.27 83.33 65.35 99.8 99.8 99.6 99.5 99.5 77.74 46.8 61.21 0.08
Test Accuracy 73.1 93.75 83.35 69.86 97.66 97.59 97.51 97.59 97.81 84.4 50.26 34.06 2.19
F1 Macro 63.72 91.0 73.73 50.12 95.8 95.58 96.18 96.32 96.37 73.03 38.82 31.03 0.71
F1 Weighted 71.31 93.78 81.47 67.45 97.71 97.58 97.52 97.61 97.82 82.33 39.26 22.91 0.09
Recall Macro 62.02 90.87 73.63 50.75 96.99 95.05 96.52 96.55 96.23 72.65 40.38 48.55 16.67
Recall Weighted 73.1 93.75 83.35 69.86 97.66 97.59 97.51 97.59 97.81 84.4 50.26 34.06 2.19
Precision Macro 67.42 91.22 90.05 64.89 94.84 96.13 95.88 96.13 96.52 73.93 54.71 46.04 0.36
Precision Weighted 70.03 93.86 84.53 67.62 97.83 97.58 97.55 97.65 97.85 80.49 46.79 64.49 0.05
In [ ]:
metric_to_show = 'Accuracy'

ax = result_ac_classroom.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_classroom.columns.to_numpy())), result_ac_classroom.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
evaluator_ac_classroom_hyper_tuned = hyper_tuned_ml_pipeline_obj(ac_classroom_x, ac_classroom_y, test_size = 0.4)
================================================
LogisticRegression tuned model has started training
Fitting 5 folds for each of 40 candidates, totalling 200 fits
LogisticRegression tuned model has ended training. Time -> 58.56s. Accuracy - > 75.96 %
================================================


================================================
KNN tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
KNN tuned model has ended training. Time -> 2.58s. Accuracy - > 94.72 %
================================================


================================================
SVM tuned model has started training
Fitting 5 folds for each of 49 candidates, totalling 245 fits
SVM tuned model has ended training. Time -> 26.1s. Accuracy - > 94.80 %
================================================


================================================
Linear SVM tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Linear SVM tuned model has ended training. Time -> 2.35s. Accuracy - > 79.50 %
================================================


================================================
DecisionTree tuned model has started training
Fitting 5 folds for each of 4800 candidates, totalling 24000 fits
DecisionTree tuned model has ended training. Time -> 134.04s. Accuracy - > 97.06 %
================================================


================================================
RandomForest tuned model has started training
Fitting 5 folds for each of 10 candidates, totalling 50 fits
RandomForest tuned model has ended training. Time -> 162.94s. Accuracy - > 97.59 %
================================================


================================================
XGB tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
XGB tuned model has ended training. Time -> 161.99s. Accuracy - > 97.59 %
================================================


================================================
LGBM tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
LGBM tuned model has ended training. Time -> 175.38s. Accuracy - > 97.51 %
================================================


================================================
GradientBoosting tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
GradientBoosting tuned model has ended training. Time -> 34.87s. Accuracy - > 97.51 %
================================================


================================================
MLP Neural Net tuned model has started training
Fitting 5 folds for each of 50 candidates, totalling 250 fits
MLP Neural Net tuned model has ended training. Time -> 726.53s. Accuracy - > 96.01 %
================================================


================================================
AdaBoost tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
AdaBoost tuned model has ended training. Time -> 55.54s. Accuracy - > 56.52 %
================================================


================================================
Naive Bayes tuned model has started training
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Naive Bayes tuned model has ended training. Time -> 3.12s. Accuracy - > 57.27 %
================================================


================================================
QDA tuned model has started training
Fitting 5 folds for each of 5 candidates, totalling 25 fits
QDA tuned model has ended training. Time -> 0.29s. Accuracy - > 76.79 %
================================================

In [ ]:
result_ac_classroom_hyper_tuned = evaluate_result(evaluator_ac_classroom_hyper_tuned)
result_ac_classroom_hyper_tuned
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 75.87 97.23 94.87 80.19 99.04 99.35 99.6 99.3 99.8 98.54 52.99 54.4 72.75
F1 Macro 72.98 96.43 92.7 78.53 98.47 99.09 99.6 99.1 99.73 98.15 39.83 52.32 71.13
F1 Weighted 74.38 97.22 94.69 79.81 99.05 99.34 99.6 99.29 99.8 98.54 48.06 49.99 73.09
Recall Macro 72.29 96.27 91.5 77.61 98.37 98.79 99.45 98.95 99.66 98.11 42.47 57.79 72.7
Recall Weighted 75.87 97.23 94.87 80.19 99.04 99.35 99.6 99.3 99.8 98.54 52.99 54.4 72.75
Precision Macro 84.69 96.61 94.64 80.14 98.57 99.4 99.75 99.25 99.8 98.19 46.55 56.94 72.67
Precision Weighted 77.22 97.22 94.82 79.73 99.05 99.34 99.6 99.29 99.8 98.55 54.57 58.54 77.45
Test Accuracy 75.96 94.72 94.8 79.5 97.06 97.59 97.59 97.51 97.51 96.01 56.52 57.27 76.79
F1 Macro 73.3 91.94 92.45 75.84 95.02 96.06 96.26 96.07 95.86 94.25 42.46 53.0 74.12
F1 Weighted 74.89 94.75 94.73 79.38 97.11 97.58 97.61 97.52 97.54 96.04 52.01 54.01 77.68
Recall Macro 71.63 92.23 90.96 74.18 95.17 95.26 96.77 95.75 96.07 94.66 44.66 58.64 75.93
Recall Weighted 75.96 94.72 94.8 79.5 97.06 97.59 97.59 97.51 97.51 96.01 56.52 57.27 76.79
Precision Macro 84.57 91.74 94.26 77.79 95.06 96.91 95.8 96.41 95.7 93.92 50.62 57.86 75.49
Precision Weighted 76.83 94.82 94.77 79.44 97.22 97.59 97.64 97.54 97.59 96.11 59.99 64.08 82.63
In [ ]:
metric_to_show = 'Accuracy'

result_ac_classroom_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_ac_classroom_hyper_tuned.columns.to_numpy())), result_ac_classroom_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_ac_classroom_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_ac_classroom.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_ac_classroom.columns.to_numpy())), result_ac_classroom.columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image
In [ ]:
model_names, models = [], []
for model_name in evaluator_ac_classroom_hyper_tuned.model_names:
    model_names.append(model_name)
    models.append(evaluator_ac_classroom_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
ann_model_ac_classroom = ann_model(ac_classroom_x, ac_classroom_y, test_size = 0.4)
Epoch 1/50, Train Loss: 7.3231, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 2/50, Train Loss: 6.8273, Train Accuracy: 0.3927, Dev Accuracy: 0.3967
Epoch 3/50, Train Loss: 5.8184, Train Accuracy: 0.4414, Dev Accuracy: 0.4465
Epoch 4/50, Train Loss: 6.6608, Train Accuracy: 0.4520, Dev Accuracy: 0.4540
Epoch 5/50, Train Loss: 4.1816, Train Accuracy: 0.4706, Dev Accuracy: 0.4630
Epoch 6/50, Train Loss: 5.1708, Train Accuracy: 0.4741, Dev Accuracy: 0.4691
Epoch 7/50, Train Loss: 5.4508, Train Accuracy: 0.5767, Dev Accuracy: 0.5641
Epoch 8/50, Train Loss: 1.8186, Train Accuracy: 0.5918, Dev Accuracy: 0.5686
Epoch 9/50, Train Loss: 1.1433, Train Accuracy: 0.6687, Dev Accuracy: 0.6742
Epoch 10/50, Train Loss: 0.7894, Train Accuracy: 0.6973, Dev Accuracy: 0.6938
Epoch 11/50, Train Loss: 1.1286, Train Accuracy: 0.6516, Dev Accuracy: 0.6591
Epoch 12/50, Train Loss: 3.3833, Train Accuracy: 0.7220, Dev Accuracy: 0.7360
Epoch 13/50, Train Loss: 3.3087, Train Accuracy: 0.7134, Dev Accuracy: 0.7376
Epoch 14/50, Train Loss: 1.1667, Train Accuracy: 0.7074, Dev Accuracy: 0.7074
Epoch 15/50, Train Loss: 2.4644, Train Accuracy: 0.7667, Dev Accuracy: 0.7888
Epoch 16/50, Train Loss: 2.9475, Train Accuracy: 0.7728, Dev Accuracy: 0.7858
Epoch 17/50, Train Loss: 2.7459, Train Accuracy: 0.7345, Dev Accuracy: 0.7572
Epoch 18/50, Train Loss: 1.2099, Train Accuracy: 0.7944, Dev Accuracy: 0.8054
Epoch 19/50, Train Loss: 1.7360, Train Accuracy: 0.6541, Dev Accuracy: 0.6440
Epoch 20/50, Train Loss: 1.8859, Train Accuracy: 0.7858, Dev Accuracy: 0.8009
Epoch 21/50, Train Loss: 0.7349, Train Accuracy: 0.6556, Dev Accuracy: 0.6848
Epoch 22/50, Train Loss: 1.5846, Train Accuracy: 0.7702, Dev Accuracy: 0.7843
Epoch 23/50, Train Loss: 1.0822, Train Accuracy: 0.7944, Dev Accuracy: 0.8100
Epoch 24/50, Train Loss: 1.6786, Train Accuracy: 0.7144, Dev Accuracy: 0.7345
Epoch 25/50, Train Loss: 0.9919, Train Accuracy: 0.7567, Dev Accuracy: 0.7647
Epoch 26/50, Train Loss: 0.4078, Train Accuracy: 0.8064, Dev Accuracy: 0.8220
Epoch 27/50, Train Loss: 3.7707, Train Accuracy: 0.6385, Dev Accuracy: 0.6109
Epoch 28/50, Train Loss: 0.6523, Train Accuracy: 0.7919, Dev Accuracy: 0.8205
Epoch 29/50, Train Loss: 1.3765, Train Accuracy: 0.7717, Dev Accuracy: 0.7843
Epoch 30/50, Train Loss: 2.3608, Train Accuracy: 0.8074, Dev Accuracy: 0.8250
Epoch 31/50, Train Loss: 2.2627, Train Accuracy: 0.7898, Dev Accuracy: 0.8054
Epoch 32/50, Train Loss: 0.7710, Train Accuracy: 0.8175, Dev Accuracy: 0.8235
Epoch 33/50, Train Loss: 0.8699, Train Accuracy: 0.7883, Dev Accuracy: 0.8115
Epoch 34/50, Train Loss: 0.7134, Train Accuracy: 0.7969, Dev Accuracy: 0.8190
Epoch 35/50, Train Loss: 1.3125, Train Accuracy: 0.6787, Dev Accuracy: 0.6998
Epoch 36/50, Train Loss: 1.6866, Train Accuracy: 0.7823, Dev Accuracy: 0.7964
Epoch 37/50, Train Loss: 1.0597, Train Accuracy: 0.8195, Dev Accuracy: 0.8326
Epoch 38/50, Train Loss: 1.3165, Train Accuracy: 0.8110, Dev Accuracy: 0.8190
Epoch 39/50, Train Loss: 0.8309, Train Accuracy: 0.8039, Dev Accuracy: 0.8205
Epoch 40/50, Train Loss: 0.7335, Train Accuracy: 0.8009, Dev Accuracy: 0.8024
Epoch 41/50, Train Loss: 0.1936, Train Accuracy: 0.8074, Dev Accuracy: 0.8160
Epoch 42/50, Train Loss: 1.4569, Train Accuracy: 0.7964, Dev Accuracy: 0.8160
Epoch 43/50, Train Loss: 2.3033, Train Accuracy: 0.7712, Dev Accuracy: 0.7677
Epoch 44/50, Train Loss: 1.9026, Train Accuracy: 0.7144, Dev Accuracy: 0.7360
Epoch 45/50, Train Loss: 2.6893, Train Accuracy: 0.7506, Dev Accuracy: 0.7511
Epoch 46/50, Train Loss: 1.7714, Train Accuracy: 0.8084, Dev Accuracy: 0.8160
Epoch 47/50, Train Loss: 2.6553, Train Accuracy: 0.7778, Dev Accuracy: 0.7798
Epoch 48/50, Train Loss: 2.0022, Train Accuracy: 0.8220, Dev Accuracy: 0.8371
Epoch 49/50, Train Loss: 1.0888, Train Accuracy: 0.8160, Dev Accuracy: 0.8386
Epoch 50/50, Train Loss: 4.3663, Train Accuracy: 0.7949, Dev Accuracy: 0.8175
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92        13
           1       1.00      0.88      0.93        80
           2       0.33      0.03      0.06        32
           3       0.66      0.94      0.78       149
           4       0.87      0.85      0.86       265
           5       0.87      0.75      0.81       124

    accuracy                           0.82       663
   macro avg       0.78      0.73      0.73       663
weighted avg       0.82      0.82      0.80       663

F1-Score: 0.8036
Hidden layers configuration: [16, 8], Accuracy: 0.8174962292609351
Epoch 1/50, Train Loss: 7.5274, Train Accuracy: 0.1679, Dev Accuracy: 0.1312
Epoch 2/50, Train Loss: 7.5536, Train Accuracy: 0.3881, Dev Accuracy: 0.3635
Epoch 3/50, Train Loss: 6.8964, Train Accuracy: 0.6310, Dev Accuracy: 0.6591
Epoch 4/50, Train Loss: 5.9499, Train Accuracy: 0.6838, Dev Accuracy: 0.7044
Epoch 5/50, Train Loss: 5.3552, Train Accuracy: 0.6375, Dev Accuracy: 0.6621
Epoch 6/50, Train Loss: 3.2717, Train Accuracy: 0.6687, Dev Accuracy: 0.6998
Epoch 7/50, Train Loss: 3.5244, Train Accuracy: 0.6556, Dev Accuracy: 0.6802
Epoch 8/50, Train Loss: 3.8065, Train Accuracy: 0.7079, Dev Accuracy: 0.7315
Epoch 9/50, Train Loss: 3.8193, Train Accuracy: 0.7210, Dev Accuracy: 0.7406
Epoch 10/50, Train Loss: 1.2348, Train Accuracy: 0.7270, Dev Accuracy: 0.7617
Epoch 11/50, Train Loss: 1.3554, Train Accuracy: 0.7778, Dev Accuracy: 0.8069
Epoch 12/50, Train Loss: 0.9241, Train Accuracy: 0.7753, Dev Accuracy: 0.7888
Epoch 13/50, Train Loss: 2.0453, Train Accuracy: 0.6571, Dev Accuracy: 0.6471
Epoch 14/50, Train Loss: 2.5873, Train Accuracy: 0.7350, Dev Accuracy: 0.7647
Epoch 15/50, Train Loss: 2.6264, Train Accuracy: 0.6647, Dev Accuracy: 0.6546
Epoch 16/50, Train Loss: 2.7725, Train Accuracy: 0.6792, Dev Accuracy: 0.6998
Epoch 17/50, Train Loss: 1.4903, Train Accuracy: 0.7722, Dev Accuracy: 0.7934
Epoch 18/50, Train Loss: 0.9666, Train Accuracy: 0.6933, Dev Accuracy: 0.7149
Epoch 19/50, Train Loss: 2.0856, Train Accuracy: 0.7717, Dev Accuracy: 0.7949
Epoch 20/50, Train Loss: 1.2760, Train Accuracy: 0.7396, Dev Accuracy: 0.7662
Epoch 21/50, Train Loss: 0.4466, Train Accuracy: 0.7089, Dev Accuracy: 0.7376
Epoch 22/50, Train Loss: 2.6181, Train Accuracy: 0.5269, Dev Accuracy: 0.5309
Epoch 23/50, Train Loss: 2.8069, Train Accuracy: 0.7893, Dev Accuracy: 0.8175
Epoch 24/50, Train Loss: 2.5516, Train Accuracy: 0.7119, Dev Accuracy: 0.7149
Epoch 25/50, Train Loss: 1.6214, Train Accuracy: 0.6360, Dev Accuracy: 0.6531
Epoch 26/50, Train Loss: 1.3110, Train Accuracy: 0.7964, Dev Accuracy: 0.8205
Epoch 27/50, Train Loss: 0.3744, Train Accuracy: 0.7853, Dev Accuracy: 0.7994
Epoch 28/50, Train Loss: 1.0992, Train Accuracy: 0.7813, Dev Accuracy: 0.7707
Epoch 29/50, Train Loss: 1.7797, Train Accuracy: 0.6450, Dev Accuracy: 0.6787
Epoch 30/50, Train Loss: 1.3964, Train Accuracy: 0.7954, Dev Accuracy: 0.8281
Epoch 31/50, Train Loss: 1.7679, Train Accuracy: 0.7984, Dev Accuracy: 0.8039
Epoch 32/50, Train Loss: 1.1720, Train Accuracy: 0.7929, Dev Accuracy: 0.7873
Epoch 33/50, Train Loss: 1.6450, Train Accuracy: 0.7622, Dev Accuracy: 0.7421
Epoch 34/50, Train Loss: 1.5500, Train Accuracy: 0.7959, Dev Accuracy: 0.8145
Epoch 35/50, Train Loss: 0.7301, Train Accuracy: 0.7763, Dev Accuracy: 0.7858
Epoch 36/50, Train Loss: 2.4106, Train Accuracy: 0.7994, Dev Accuracy: 0.8100
Epoch 37/50, Train Loss: 2.5638, Train Accuracy: 0.7547, Dev Accuracy: 0.7315
Epoch 38/50, Train Loss: 1.1289, Train Accuracy: 0.7803, Dev Accuracy: 0.7813
Epoch 39/50, Train Loss: 1.0550, Train Accuracy: 0.8170, Dev Accuracy: 0.8190
Epoch 40/50, Train Loss: 0.2158, Train Accuracy: 0.8245, Dev Accuracy: 0.8416
Epoch 41/50, Train Loss: 3.4405, Train Accuracy: 0.6933, Dev Accuracy: 0.6968
Epoch 42/50, Train Loss: 2.3037, Train Accuracy: 0.8064, Dev Accuracy: 0.8235
Epoch 43/50, Train Loss: 1.4047, Train Accuracy: 0.7712, Dev Accuracy: 0.7888
Epoch 44/50, Train Loss: 1.5982, Train Accuracy: 0.7773, Dev Accuracy: 0.7919
Epoch 45/50, Train Loss: 1.3713, Train Accuracy: 0.8301, Dev Accuracy: 0.8477
Epoch 46/50, Train Loss: 4.0226, Train Accuracy: 0.7677, Dev Accuracy: 0.7828
Epoch 47/50, Train Loss: 4.0349, Train Accuracy: 0.8009, Dev Accuracy: 0.8069
Epoch 48/50, Train Loss: 1.4389, Train Accuracy: 0.8089, Dev Accuracy: 0.8205
Epoch 49/50, Train Loss: 1.2250, Train Accuracy: 0.7838, Dev Accuracy: 0.7934
Epoch 50/50, Train Loss: 0.1714, Train Accuracy: 0.8180, Dev Accuracy: 0.8235
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.39      0.92      0.55        13
           1       1.00      0.70      0.82        80
           2       0.86      0.19      0.31        32
           3       0.80      0.88      0.84       149
           4       0.85      0.92      0.88       265
           5       0.82      0.79      0.81       124

    accuracy                           0.82       663
   macro avg       0.79      0.73      0.70       663
weighted avg       0.84      0.82      0.82       663

F1-Score: 0.8164
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8235294117647058
Epoch 1/50, Train Loss: 7.3086, Train Accuracy: 0.2700, Dev Accuracy: 0.2986
Epoch 2/50, Train Loss: 6.5482, Train Accuracy: 0.5043, Dev Accuracy: 0.5204
Epoch 3/50, Train Loss: 5.2745, Train Accuracy: 0.5380, Dev Accuracy: 0.5596
Epoch 4/50, Train Loss: 4.0197, Train Accuracy: 0.5691, Dev Accuracy: 0.5928
Epoch 5/50, Train Loss: 4.3301, Train Accuracy: 0.5259, Dev Accuracy: 0.5324
Epoch 6/50, Train Loss: 4.5292, Train Accuracy: 0.6269, Dev Accuracy: 0.6380
Epoch 7/50, Train Loss: 0.8850, Train Accuracy: 0.7471, Dev Accuracy: 0.7692
Epoch 8/50, Train Loss: 1.2019, Train Accuracy: 0.7883, Dev Accuracy: 0.8009
Epoch 9/50, Train Loss: 0.5390, Train Accuracy: 0.7743, Dev Accuracy: 0.7934
Epoch 10/50, Train Loss: 0.8082, Train Accuracy: 0.7949, Dev Accuracy: 0.8145
Epoch 11/50, Train Loss: 1.4296, Train Accuracy: 0.6948, Dev Accuracy: 0.7270
Epoch 12/50, Train Loss: 1.2200, Train Accuracy: 0.7677, Dev Accuracy: 0.7722
Epoch 13/50, Train Loss: 0.6294, Train Accuracy: 0.7476, Dev Accuracy: 0.7662
Epoch 14/50, Train Loss: 1.5068, Train Accuracy: 0.7692, Dev Accuracy: 0.7843
Epoch 15/50, Train Loss: 2.7010, Train Accuracy: 0.6787, Dev Accuracy: 0.7044
Epoch 16/50, Train Loss: 1.1173, Train Accuracy: 0.7763, Dev Accuracy: 0.7994
Epoch 17/50, Train Loss: 0.6024, Train Accuracy: 0.7783, Dev Accuracy: 0.8039
Epoch 18/50, Train Loss: 1.0757, Train Accuracy: 0.5234, Dev Accuracy: 0.4962
Epoch 19/50, Train Loss: 1.7490, Train Accuracy: 0.6491, Dev Accuracy: 0.6757
Epoch 20/50, Train Loss: 1.1336, Train Accuracy: 0.6350, Dev Accuracy: 0.6440
Epoch 21/50, Train Loss: 2.2274, Train Accuracy: 0.7717, Dev Accuracy: 0.7888
Epoch 22/50, Train Loss: 2.3650, Train Accuracy: 0.8276, Dev Accuracy: 0.8296
Epoch 23/50, Train Loss: 1.1242, Train Accuracy: 0.7763, Dev Accuracy: 0.7753
Epoch 24/50, Train Loss: 0.3762, Train Accuracy: 0.8100, Dev Accuracy: 0.8235
Epoch 25/50, Train Loss: 4.2830, Train Accuracy: 0.7768, Dev Accuracy: 0.7828
Epoch 26/50, Train Loss: 0.2204, Train Accuracy: 0.8311, Dev Accuracy: 0.8431
Epoch 27/50, Train Loss: 0.8347, Train Accuracy: 0.6817, Dev Accuracy: 0.6953
Epoch 28/50, Train Loss: 0.9250, Train Accuracy: 0.7587, Dev Accuracy: 0.7632
Epoch 29/50, Train Loss: 3.1317, Train Accuracy: 0.8215, Dev Accuracy: 0.8265
Epoch 30/50, Train Loss: 1.2892, Train Accuracy: 0.5847, Dev Accuracy: 0.5822
Epoch 31/50, Train Loss: 2.4246, Train Accuracy: 0.7863, Dev Accuracy: 0.7798
Epoch 32/50, Train Loss: 1.1728, Train Accuracy: 0.7547, Dev Accuracy: 0.7738
Epoch 33/50, Train Loss: 0.6297, Train Accuracy: 0.7999, Dev Accuracy: 0.8250
Epoch 34/50, Train Loss: 1.7944, Train Accuracy: 0.8205, Dev Accuracy: 0.8250
Epoch 35/50, Train Loss: 1.4668, Train Accuracy: 0.7280, Dev Accuracy: 0.7451
Epoch 36/50, Train Loss: 2.5585, Train Accuracy: 0.7577, Dev Accuracy: 0.7738
Epoch 37/50, Train Loss: 1.4770, Train Accuracy: 0.7577, Dev Accuracy: 0.7707
Epoch 38/50, Train Loss: 1.3612, Train Accuracy: 0.8205, Dev Accuracy: 0.8416
Epoch 39/50, Train Loss: 1.1249, Train Accuracy: 0.8200, Dev Accuracy: 0.8401
Epoch 40/50, Train Loss: 1.3318, Train Accuracy: 0.8135, Dev Accuracy: 0.8326
Epoch 41/50, Train Loss: 0.6945, Train Accuracy: 0.8019, Dev Accuracy: 0.7964
Epoch 42/50, Train Loss: 0.9786, Train Accuracy: 0.8617, Dev Accuracy: 0.8627
Epoch 43/50, Train Loss: 2.1722, Train Accuracy: 0.8135, Dev Accuracy: 0.8356
Epoch 44/50, Train Loss: 0.8308, Train Accuracy: 0.7773, Dev Accuracy: 0.8115
Epoch 45/50, Train Loss: 1.3235, Train Accuracy: 0.7421, Dev Accuracy: 0.7647
Epoch 46/50, Train Loss: 1.4209, Train Accuracy: 0.8351, Dev Accuracy: 0.8311
Epoch 47/50, Train Loss: 1.0057, Train Accuracy: 0.8100, Dev Accuracy: 0.8100
Epoch 48/50, Train Loss: 1.8800, Train Accuracy: 0.7853, Dev Accuracy: 0.7617
Epoch 49/50, Train Loss: 1.0815, Train Accuracy: 0.8079, Dev Accuracy: 0.8235
Epoch 50/50, Train Loss: 3.3087, Train Accuracy: 0.7788, Dev Accuracy: 0.7919
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.23      0.92      0.37        13
           1       1.00      0.44      0.61        80
           2       0.58      0.44      0.50        32
           3       0.77      0.91      0.83       149
           4       0.91      0.83      0.87       265
           5       0.81      0.88      0.84       124

    accuracy                           0.79       663
   macro avg       0.72      0.74      0.67       663
weighted avg       0.84      0.79      0.80       663

F1-Score: 0.7964
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.7918552036199095
Best model hidden layers configuration: Sequential(
  (0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Linear(in_features=9, out_features=32, bias=True)
  (2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Linear(in_features=32, out_features=16, bias=True)
  (5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=16, out_features=8, bias=True)
  (8): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU()
  (10): Linear(in_features=8, out_features=3316, bias=True)
), Best accuracy: 0.8235294117647058
Test Accuracy: 0.8418674698795181
In [ ]:
evaluate_ann(ann_model_ac_classroom, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]
Out[ ]:
ANN
Metrics
Train Accuracy 81.8
F1 Macro 72.38
F1 Weighted 80.58
Recall Macro 74.18
Recall Weighted 81.8
Precision Macro 78.85
Precision Weighted 82.45
Test Accuracy 83.27
F1 Macro 71.13
F1 Weighted 82.46
Recall Macro 73.79
Recall Weighted 83.27
Precision Macro 79.1
Precision Weighted 84.73

Objects ->

  • evaluator_ac_classroom
  • evaluator_ac_classroom_hyper_tuned

Results ->

  • result_ac_classroom
  • result_ac_classroom_hyper_tuned

Non-AC Classroom ML Machine Learning Model¶

In [ ]:
# Preapring data for ML
df_tmp = new_df.query("`Room Type` == 1 and `Room Condition` == 2")[['CO2 (ppm)', 'PM1 (ug/m3)', 'PM2.5 (ug/m3)', 'PM10 (ug/m3)', 'Temperature (C)', 'Humidity (%)', 'Position', 'Room Condition', 'Room Type', 'Floor No.', 'Weather', 'Occupancy_Classified']].drop(["Room Condition", "Room Type"], axis = 1)
In [ ]:
# Split data into x and y
non_ac_classroom_x = df_tmp.drop('Occupancy_Classified', axis = 1)
non_ac_classroom_y = df_tmp['Occupancy_Classified'].values  # converting to numpy array

# Scaling input variables, output variables doens't required as we are just predicting discrete outcomes
non_ac_classroom_x = st_x.fit_transform(non_ac_classroom_x)
In [ ]:
# ML Training without hyper tuned
evaluator_non_ac_classroom = original_ml_pipeline_obj(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
================================================
LogisticRegression model has started training
LogisticRegression model has ended training. Time -> 0.02s. Accuracy - > 76.20 %
================================================


================================================
KNN model has started training
KNN model has ended training. Time -> 0.0s. Accuracy - > 96.31 %
================================================


================================================
SVM model has started training
SVM model has ended training. Time -> 0.02s. Accuracy - > 79.34 %
================================================


================================================
Linear SVM model has started training
Linear SVM model has ended training. Time -> 0.03s. Accuracy - > 71.59 %
================================================


================================================
DecisionTree model has started training
DecisionTree model has ended training. Time -> 0.0s. Accuracy - > 96.13 %
================================================


================================================
RandomForest model has started training
RandomForest model has ended training. Time -> 3.07s. Accuracy - > 97.23 %
================================================


================================================
XGB model has started training
XGB model has ended training. Time -> 0.97s. Accuracy - > 97.60 %
================================================


================================================
LGBM model has started training
LGBM model has ended training. Time -> 2.57s. Accuracy - > 97.60 %
================================================


================================================
GradientBoosting model has started training
GradientBoosting model has ended training. Time -> 1.13s. Accuracy - > 96.31 %
================================================


================================================
MLP Neural Net model has started training
MLP Neural Net model has ended training. Time -> 1.15s. Accuracy - > 75.83 %
================================================


================================================
AdaBoost model has started training
AdaBoost model has ended training. Time -> 0.15s. Accuracy - > 68.27 %
================================================


================================================
Naive Bayes model has started training
Naive Bayes model has ended training. Time -> 0.0s. Accuracy - > 82.29 %
================================================


================================================
QDA model has started training
QDA model has ended training. Time -> 0.0s. Accuracy - > 71.03 %
================================================

In [ ]:
result_non_ac_classroom = evaluate_result(evaluator_non_ac_classroom)
result_non_ac_classroom
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 77.07 97.53 82.12 71.89 100.0 100.0 100.0 100.0 100.0 77.07 69.54 81.13 72.38
F1 Macro 55.99 96.07 68.99 41.59 100.0 100.0 100.0 100.0 100.0 53.1 63.58 75.12 56.02
F1 Weighted 72.66 97.54 80.4 64.24 100.0 100.0 100.0 100.0 100.0 71.31 70.84 82.33 67.14
Recall Macro 58.56 96.28 71.11 47.77 100.0 100.0 100.0 100.0 100.0 58.03 63.04 78.52 58.6
Recall Weighted 77.07 97.53 82.12 71.89 100.0 100.0 100.0 100.0 100.0 77.07 69.54 81.13 72.38
Precision Macro 60.52 95.87 72.52 54.42 100.0 100.0 100.0 100.0 100.0 49.25 71.41 78.15 65.99
Precision Weighted 72.39 97.55 81.89 68.01 100.0 100.0 100.0 100.0 100.0 66.75 78.37 88.87 74.67
Test Accuracy 76.2 96.31 79.34 71.59 96.13 97.23 97.6 97.6 96.31 75.83 68.27 82.29 71.03
F1 Macro 55.9 94.21 66.44 42.59 93.8 95.39 96.32 96.12 94.49 53.3 61.31 77.98 57.81
F1 Weighted 70.65 96.23 77.14 62.6 96.12 97.22 97.61 97.6 96.31 69.19 68.99 83.54 66.13
Recall Macro 59.84 93.87 68.1 48.84 93.85 95.13 96.08 95.78 94.53 59.38 59.77 80.44 60.95
Recall Weighted 76.2 96.31 79.34 71.59 96.13 97.23 97.6 97.6 96.31 75.83 68.27 82.29 71.03
Precision Macro 61.06 94.76 69.4 54.63 93.78 95.66 96.6 96.48 94.48 48.88 68.43 80.5 65.16
Precision Weighted 71.09 96.29 77.85 64.59 96.12 97.21 97.63 97.61 96.32 64.09 73.85 89.17 73.0
In [ ]:
metric_to_show = 'Accuracy'

ax = result_non_ac_classroom.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_non_ac_classroom.columns.to_numpy())), result_non_ac_classroom.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
evaluator_non_ac_classroom_hyper_tuned = hyper_tuned_ml_pipeline_obj(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
================================================
LogisticRegression tuned model has started training
Fitting 5 folds for each of 40 candidates, totalling 200 fits
LogisticRegression tuned model has ended training. Time -> 19.26s. Accuracy - > 80.81 %
================================================


================================================
KNN tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
KNN tuned model has ended training. Time -> 1.33s. Accuracy - > 95.20 %
================================================


================================================
SVM tuned model has started training
Fitting 5 folds for each of 49 candidates, totalling 245 fits
SVM tuned model has ended training. Time -> 3.81s. Accuracy - > 95.57 %
================================================


================================================
Linear SVM tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
Linear SVM tuned model has ended training. Time -> 0.25s. Accuracy - > 83.39 %
================================================


================================================
DecisionTree tuned model has started training
Fitting 5 folds for each of 4800 candidates, totalling 24000 fits
DecisionTree tuned model has ended training. Time -> 103.19s. Accuracy - > 96.13 %
================================================


================================================
RandomForest tuned model has started training
Fitting 5 folds for each of 10 candidates, totalling 50 fits
RandomForest tuned model has ended training. Time -> 171.82s. Accuracy - > 97.42 %
================================================


================================================
XGB tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
XGB tuned model has ended training. Time -> 85.58s. Accuracy - > 97.23 %
================================================


================================================
LGBM tuned model has started training
Fitting 5 folds for each of 150 candidates, totalling 750 fits
LGBM tuned model has ended training. Time -> 93.3s. Accuracy - > 97.60 %
================================================


================================================
GradientBoosting tuned model has started training
Fitting 5 folds for each of 3 candidates, totalling 15 fits
GradientBoosting tuned model has ended training. Time -> 17.3s. Accuracy - > 96.49 %
================================================


================================================
MLP Neural Net tuned model has started training
Fitting 5 folds for each of 50 candidates, totalling 250 fits
MLP Neural Net tuned model has ended training. Time -> 311.3s. Accuracy - > 96.86 %
================================================


================================================
AdaBoost tuned model has started training
Fitting 5 folds for each of 20 candidates, totalling 100 fits
AdaBoost tuned model has ended training. Time -> 41.46s. Accuracy - > 64.76 %
================================================


================================================
Naive Bayes tuned model has started training
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Naive Bayes tuned model has ended training. Time -> 1.65s. Accuracy - > 84.50 %
================================================


================================================
QDA tuned model has started training
Fitting 5 folds for each of 5 candidates, totalling 25 fits
QDA tuned model has ended training. Time -> 0.14s. Accuracy - > 73.80 %
================================================

In [ ]:
result_non_ac_classroom_hyper_tuned = evaluate_result(evaluator_non_ac_classroom_hyper_tuned)
result_non_ac_classroom_hyper_tuned
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA
Metrics
Train Accuracy 83.85 100.0 97.78 84.96 99.51 100.0 100.0 100.0 100.0 99.51 67.45 83.48 75.59
F1 Macro 71.88 100.0 95.99 71.27 99.13 100.0 100.0 100.0 100.0 98.89 55.33 77.79 59.82
F1 Weighted 82.38 100.0 97.78 82.6 99.51 100.0 100.0 100.0 100.0 99.51 66.42 84.76 73.27
Recall Macro 70.01 100.0 95.95 71.08 99.01 100.0 100.0 100.0 100.0 98.95 52.83 81.19 65.3
Recall Weighted 83.85 100.0 97.78 84.96 99.51 100.0 100.0 100.0 100.0 99.51 67.45 83.48 75.59
Precision Macro 78.5 100.0 96.05 87.0 99.28 100.0 100.0 100.0 100.0 98.87 63.59 79.05 61.57
Precision Weighted 83.23 100.0 97.79 86.97 99.52 100.0 100.0 100.0 100.0 99.52 71.07 89.74 77.17
Test Accuracy 80.81 95.2 95.57 83.39 96.13 97.42 97.23 97.6 96.49 96.86 64.76 84.5 73.8
F1 Macro 68.97 92.3 92.94 69.71 93.97 95.75 95.57 95.98 94.72 94.64 54.23 79.72 59.46
F1 Weighted 79.11 95.16 95.51 80.63 96.11 97.41 97.23 97.58 96.49 96.86 63.45 85.75 71.23
Recall Macro 69.02 92.08 92.32 70.58 93.72 95.45 95.45 95.69 94.62 94.36 52.03 82.21 64.66
Recall Weighted 80.81 95.2 95.57 83.39 96.13 97.42 97.23 97.6 96.49 96.86 64.76 84.5 73.8
Precision Macro 74.75 92.54 93.69 84.74 94.48 96.07 95.71 96.31 94.82 95.05 62.36 80.62 60.39
Precision Weighted 80.73 95.13 95.53 86.05 96.23 97.41 97.23 97.58 96.48 96.91 68.05 89.78 74.4
In [ ]:
ann_model_non_ac_classroom = ann_model(non_ac_classroom_x, non_ac_classroom_y, test_size = 0.4)
Epoch 1/50, Train Loss: 6.5778, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 2/50, Train Loss: 6.1484, Train Accuracy: 0.3921, Dev Accuracy: 0.3579
Epoch 3/50, Train Loss: 5.8990, Train Accuracy: 0.6893, Dev Accuracy: 0.6679
Epoch 4/50, Train Loss: 5.5161, Train Accuracy: 0.7213, Dev Accuracy: 0.7085
Epoch 5/50, Train Loss: 4.8806, Train Accuracy: 0.7275, Dev Accuracy: 0.7159
Epoch 6/50, Train Loss: 4.1998, Train Accuracy: 0.7263, Dev Accuracy: 0.7122
Epoch 7/50, Train Loss: 3.8079, Train Accuracy: 0.7263, Dev Accuracy: 0.7122
Epoch 8/50, Train Loss: 4.3991, Train Accuracy: 0.7263, Dev Accuracy: 0.7122
Epoch 9/50, Train Loss: 3.6251, Train Accuracy: 0.7263, Dev Accuracy: 0.7085
Epoch 10/50, Train Loss: 2.0269, Train Accuracy: 0.7448, Dev Accuracy: 0.7306
Epoch 11/50, Train Loss: 2.9066, Train Accuracy: 0.7275, Dev Accuracy: 0.7122
Epoch 12/50, Train Loss: 2.8730, Train Accuracy: 0.7386, Dev Accuracy: 0.7196
Epoch 13/50, Train Loss: 2.9959, Train Accuracy: 0.7287, Dev Accuracy: 0.7122
Epoch 14/50, Train Loss: 2.4475, Train Accuracy: 0.7423, Dev Accuracy: 0.7306
Epoch 15/50, Train Loss: 1.6522, Train Accuracy: 0.7300, Dev Accuracy: 0.7159
Epoch 16/50, Train Loss: 1.3678, Train Accuracy: 0.7337, Dev Accuracy: 0.7159
Epoch 17/50, Train Loss: 0.6128, Train Accuracy: 0.7411, Dev Accuracy: 0.7232
Epoch 18/50, Train Loss: 1.2833, Train Accuracy: 0.7374, Dev Accuracy: 0.7159
Epoch 19/50, Train Loss: 2.5468, Train Accuracy: 0.7435, Dev Accuracy: 0.7232
Epoch 20/50, Train Loss: 1.5095, Train Accuracy: 0.7596, Dev Accuracy: 0.7454
Epoch 21/50, Train Loss: 0.6802, Train Accuracy: 0.7756, Dev Accuracy: 0.7528
Epoch 22/50, Train Loss: 0.9016, Train Accuracy: 0.7485, Dev Accuracy: 0.7232
Epoch 23/50, Train Loss: 0.7238, Train Accuracy: 0.7608, Dev Accuracy: 0.7343
Epoch 24/50, Train Loss: 1.0489, Train Accuracy: 0.7657, Dev Accuracy: 0.7343
Epoch 25/50, Train Loss: 0.5933, Train Accuracy: 0.7990, Dev Accuracy: 0.7491
Epoch 26/50, Train Loss: 0.5710, Train Accuracy: 0.7818, Dev Accuracy: 0.7491
Epoch 27/50, Train Loss: 1.0082, Train Accuracy: 0.7805, Dev Accuracy: 0.7565
Epoch 28/50, Train Loss: 1.1951, Train Accuracy: 0.7830, Dev Accuracy: 0.7417
Epoch 29/50, Train Loss: 0.5490, Train Accuracy: 0.8052, Dev Accuracy: 0.7528
Epoch 30/50, Train Loss: 0.9078, Train Accuracy: 0.8175, Dev Accuracy: 0.7638
Epoch 31/50, Train Loss: 0.5229, Train Accuracy: 0.7361, Dev Accuracy: 0.7232
Epoch 32/50, Train Loss: 1.5286, Train Accuracy: 0.8311, Dev Accuracy: 0.7675
Epoch 33/50, Train Loss: 0.9227, Train Accuracy: 0.7719, Dev Accuracy: 0.7601
Epoch 34/50, Train Loss: 0.6001, Train Accuracy: 0.8064, Dev Accuracy: 0.7712
Epoch 35/50, Train Loss: 0.5580, Train Accuracy: 0.8631, Dev Accuracy: 0.8118
Epoch 36/50, Train Loss: 1.3681, Train Accuracy: 0.8570, Dev Accuracy: 0.8081
Epoch 37/50, Train Loss: 1.0145, Train Accuracy: 0.7768, Dev Accuracy: 0.7528
Epoch 38/50, Train Loss: 0.6287, Train Accuracy: 0.7448, Dev Accuracy: 0.7232
Epoch 39/50, Train Loss: 1.4899, Train Accuracy: 0.8742, Dev Accuracy: 0.8524
Epoch 40/50, Train Loss: 0.7135, Train Accuracy: 0.8779, Dev Accuracy: 0.8708
Epoch 41/50, Train Loss: 0.7044, Train Accuracy: 0.8742, Dev Accuracy: 0.8598
Epoch 42/50, Train Loss: 0.5358, Train Accuracy: 0.8755, Dev Accuracy: 0.8376
Epoch 43/50, Train Loss: 0.3612, Train Accuracy: 0.8767, Dev Accuracy: 0.8413
Epoch 44/50, Train Loss: 0.2973, Train Accuracy: 0.8829, Dev Accuracy: 0.8598
Epoch 45/50, Train Loss: 0.6728, Train Accuracy: 0.7842, Dev Accuracy: 0.7565
Epoch 46/50, Train Loss: 0.1552, Train Accuracy: 0.8792, Dev Accuracy: 0.8561
Epoch 47/50, Train Loss: 0.4570, Train Accuracy: 0.8816, Dev Accuracy: 0.8524
Epoch 48/50, Train Loss: 0.9370, Train Accuracy: 0.8940, Dev Accuracy: 0.8708
Epoch 49/50, Train Loss: 1.0368, Train Accuracy: 0.8002, Dev Accuracy: 0.8044
Epoch 50/50, Train Loss: 0.5546, Train Accuracy: 0.8890, Dev Accuracy: 0.8598
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60
           1       0.94      0.97      0.95        60
           2       0.44      0.67      0.53        18
           3       0.86      0.83      0.84        23
           4       0.50      0.48      0.49        27
           5       0.96      0.86      0.90        83

    accuracy                           0.86       271
   macro avg       0.78      0.80      0.79       271
weighted avg       0.88      0.86      0.86       271

F1-Score: 0.8649
Hidden layers configuration: [16, 8], Accuracy: 0.8597785977859779
Epoch 1/50, Train Loss: 7.0561, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 2/50, Train Loss: 6.9467, Train Accuracy: 0.0000, Dev Accuracy: 0.0000
Epoch 3/50, Train Loss: 6.6444, Train Accuracy: 0.1985, Dev Accuracy: 0.1808
Epoch 4/50, Train Loss: 6.0677, Train Accuracy: 0.4797, Dev Accuracy: 0.4207
Epoch 5/50, Train Loss: 6.7883, Train Accuracy: 0.4834, Dev Accuracy: 0.4207
Epoch 6/50, Train Loss: 5.4200, Train Accuracy: 0.4821, Dev Accuracy: 0.4207
Epoch 7/50, Train Loss: 4.9502, Train Accuracy: 0.5018, Dev Accuracy: 0.4502
Epoch 8/50, Train Loss: 4.1433, Train Accuracy: 0.5968, Dev Accuracy: 0.6125
Epoch 9/50, Train Loss: 3.8792, Train Accuracy: 0.6276, Dev Accuracy: 0.6273
Epoch 10/50, Train Loss: 3.7625, Train Accuracy: 0.6326, Dev Accuracy: 0.6384
Epoch 11/50, Train Loss: 4.1851, Train Accuracy: 0.5857, Dev Accuracy: 0.5535
Epoch 12/50, Train Loss: 4.0939, Train Accuracy: 0.6178, Dev Accuracy: 0.5646
Epoch 13/50, Train Loss: 2.5764, Train Accuracy: 0.5857, Dev Accuracy: 0.5424
Epoch 14/50, Train Loss: 2.6895, Train Accuracy: 0.6350, Dev Accuracy: 0.5978
Epoch 15/50, Train Loss: 1.8739, Train Accuracy: 0.6387, Dev Accuracy: 0.6125
Epoch 16/50, Train Loss: 1.8339, Train Accuracy: 0.6979, Dev Accuracy: 0.6974
Epoch 17/50, Train Loss: 4.0362, Train Accuracy: 0.7250, Dev Accuracy: 0.6900
Epoch 18/50, Train Loss: 1.2412, Train Accuracy: 0.6806, Dev Accuracy: 0.6863
Epoch 19/50, Train Loss: 3.9366, Train Accuracy: 0.7263, Dev Accuracy: 0.7196
Epoch 20/50, Train Loss: 2.1340, Train Accuracy: 0.7448, Dev Accuracy: 0.7269
Epoch 21/50, Train Loss: 3.1835, Train Accuracy: 0.7485, Dev Accuracy: 0.7380
Epoch 22/50, Train Loss: 1.7466, Train Accuracy: 0.7509, Dev Accuracy: 0.7380
Epoch 23/50, Train Loss: 2.1988, Train Accuracy: 0.7497, Dev Accuracy: 0.7380
Epoch 24/50, Train Loss: 2.4345, Train Accuracy: 0.7386, Dev Accuracy: 0.7196
Epoch 25/50, Train Loss: 2.1015, Train Accuracy: 0.7349, Dev Accuracy: 0.7343
Epoch 26/50, Train Loss: 2.8902, Train Accuracy: 0.7287, Dev Accuracy: 0.7196
Epoch 27/50, Train Loss: 1.8860, Train Accuracy: 0.7485, Dev Accuracy: 0.7380
Epoch 28/50, Train Loss: 2.3005, Train Accuracy: 0.7472, Dev Accuracy: 0.7380
Epoch 29/50, Train Loss: 1.5570, Train Accuracy: 0.7337, Dev Accuracy: 0.7159
Epoch 30/50, Train Loss: 0.7955, Train Accuracy: 0.7472, Dev Accuracy: 0.7343
Epoch 31/50, Train Loss: 1.2514, Train Accuracy: 0.7719, Dev Accuracy: 0.7565
Epoch 32/50, Train Loss: 1.1841, Train Accuracy: 0.7571, Dev Accuracy: 0.7343
Epoch 33/50, Train Loss: 2.9043, Train Accuracy: 0.7657, Dev Accuracy: 0.7601
Epoch 34/50, Train Loss: 1.8747, Train Accuracy: 0.7337, Dev Accuracy: 0.7159
Epoch 35/50, Train Loss: 1.5866, Train Accuracy: 0.7965, Dev Accuracy: 0.7675
Epoch 36/50, Train Loss: 1.4221, Train Accuracy: 0.8101, Dev Accuracy: 0.7749
Epoch 37/50, Train Loss: 1.2736, Train Accuracy: 0.8187, Dev Accuracy: 0.7786
Epoch 38/50, Train Loss: 0.6414, Train Accuracy: 0.7855, Dev Accuracy: 0.7565
Epoch 39/50, Train Loss: 1.2493, Train Accuracy: 0.8150, Dev Accuracy: 0.7749
Epoch 40/50, Train Loss: 0.1796, Train Accuracy: 0.8274, Dev Accuracy: 0.7823
Epoch 41/50, Train Loss: 0.4022, Train Accuracy: 0.8261, Dev Accuracy: 0.7860
Epoch 42/50, Train Loss: 0.7608, Train Accuracy: 0.8348, Dev Accuracy: 0.7897
Epoch 43/50, Train Loss: 1.3726, Train Accuracy: 0.8483, Dev Accuracy: 0.8229
Epoch 44/50, Train Loss: 1.5783, Train Accuracy: 0.8323, Dev Accuracy: 0.8155
Epoch 45/50, Train Loss: 0.9034, Train Accuracy: 0.7904, Dev Accuracy: 0.7638
Epoch 46/50, Train Loss: 0.8862, Train Accuracy: 0.8767, Dev Accuracy: 0.8487
Epoch 47/50, Train Loss: 0.3232, Train Accuracy: 0.8792, Dev Accuracy: 0.8487
Epoch 48/50, Train Loss: 0.6400, Train Accuracy: 0.8335, Dev Accuracy: 0.8192
Epoch 49/50, Train Loss: 0.9830, Train Accuracy: 0.6301, Dev Accuracy: 0.6273
Epoch 50/50, Train Loss: 0.4923, Train Accuracy: 0.8730, Dev Accuracy: 0.8598
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60
           1       0.84      0.97      0.90        60
           2       0.48      0.78      0.60        18
           3       0.95      0.83      0.88        23
           4       0.45      0.33      0.38        27
           5       1.00      0.88      0.94        83

    accuracy                           0.86       271
   macro avg       0.79      0.80      0.78       271
weighted avg       0.87      0.86      0.86       271

F1-Score: 0.8599
Hidden layers configuration: [32, 16, 8], Accuracy: 0.8597785977859779
Epoch 1/50, Train Loss: 6.4835, Train Accuracy: 0.1899, Dev Accuracy: 0.1993
Epoch 2/50, Train Loss: 6.2137, Train Accuracy: 0.5598, Dev Accuracy: 0.4908
Epoch 3/50, Train Loss: 5.5238, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 4/50, Train Loss: 5.3057, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 5/50, Train Loss: 4.2484, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 6/50, Train Loss: 3.9475, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 7/50, Train Loss: 3.7466, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 8/50, Train Loss: 2.8665, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 9/50, Train Loss: 2.8141, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 10/50, Train Loss: 2.0393, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 11/50, Train Loss: 3.1371, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 12/50, Train Loss: 1.5216, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 13/50, Train Loss: 3.3945, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 14/50, Train Loss: 3.0591, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 15/50, Train Loss: 2.5538, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 16/50, Train Loss: 2.7765, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 17/50, Train Loss: 2.2761, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 18/50, Train Loss: 2.4770, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 19/50, Train Loss: 1.5217, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 20/50, Train Loss: 2.8558, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 21/50, Train Loss: 1.4355, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 22/50, Train Loss: 2.9788, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 23/50, Train Loss: 2.6036, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 24/50, Train Loss: 1.5833, Train Accuracy: 0.5783, Dev Accuracy: 0.5277
Epoch 25/50, Train Loss: 1.2470, Train Accuracy: 0.6079, Dev Accuracy: 0.5609
Epoch 26/50, Train Loss: 1.8446, Train Accuracy: 0.7238, Dev Accuracy: 0.7122
Epoch 27/50, Train Loss: 1.0970, Train Accuracy: 0.8200, Dev Accuracy: 0.7786
Epoch 28/50, Train Loss: 1.5065, Train Accuracy: 0.7953, Dev Accuracy: 0.7491
Epoch 29/50, Train Loss: 1.1908, Train Accuracy: 0.8113, Dev Accuracy: 0.7638
Epoch 30/50, Train Loss: 0.7517, Train Accuracy: 0.7916, Dev Accuracy: 0.7601
Epoch 31/50, Train Loss: 0.4835, Train Accuracy: 0.8101, Dev Accuracy: 0.7638
Epoch 32/50, Train Loss: 0.9260, Train Accuracy: 0.8076, Dev Accuracy: 0.7786
Epoch 33/50, Train Loss: 1.1875, Train Accuracy: 0.8261, Dev Accuracy: 0.8007
Epoch 34/50, Train Loss: 0.9645, Train Accuracy: 0.8052, Dev Accuracy: 0.7786
Epoch 35/50, Train Loss: 0.9682, Train Accuracy: 0.7818, Dev Accuracy: 0.7380
Epoch 36/50, Train Loss: 0.6575, Train Accuracy: 0.8212, Dev Accuracy: 0.7823
Epoch 37/50, Train Loss: 1.0580, Train Accuracy: 0.8138, Dev Accuracy: 0.7749
Epoch 38/50, Train Loss: 0.4384, Train Accuracy: 0.8064, Dev Accuracy: 0.7860
Epoch 39/50, Train Loss: 1.2047, Train Accuracy: 0.8286, Dev Accuracy: 0.8007
Epoch 40/50, Train Loss: 0.2437, Train Accuracy: 0.7855, Dev Accuracy: 0.7565
Epoch 41/50, Train Loss: 0.4326, Train Accuracy: 0.8101, Dev Accuracy: 0.7786
Epoch 42/50, Train Loss: 0.3186, Train Accuracy: 0.8570, Dev Accuracy: 0.8339
Epoch 43/50, Train Loss: 0.7712, Train Accuracy: 0.7978, Dev Accuracy: 0.7897
Epoch 44/50, Train Loss: 0.3090, Train Accuracy: 0.8730, Dev Accuracy: 0.8635
Epoch 45/50, Train Loss: 1.0190, Train Accuracy: 0.8397, Dev Accuracy: 0.8118
Epoch 46/50, Train Loss: 0.3706, Train Accuracy: 0.8533, Dev Accuracy: 0.8339
Epoch 47/50, Train Loss: 0.3101, Train Accuracy: 0.8434, Dev Accuracy: 0.8303
Epoch 48/50, Train Loss: 0.4496, Train Accuracy: 0.8089, Dev Accuracy: 0.8081
Epoch 49/50, Train Loss: 0.6742, Train Accuracy: 0.8508, Dev Accuracy: 0.8561
Epoch 50/50, Train Loss: 0.3206, Train Accuracy: 0.8841, Dev Accuracy: 0.8745
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60
           1       0.89      0.95      0.92        60
           2       0.54      0.72      0.62        18
           3       0.89      0.74      0.81        23
           4       0.50      0.44      0.47        27
           5       0.97      0.94      0.96        83

    accuracy                           0.87       271
   macro avg       0.80      0.80      0.80       271
weighted avg       0.88      0.87      0.87       271

F1-Score: 0.8748
Hidden layers configuration: [64, 32, 16, 8], Accuracy: 0.8745387453874539
Best model hidden layers configuration: Sequential(
  (0): BatchNorm1d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Linear(in_features=9, out_features=64, bias=True)
  (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=32, bias=True)
  (5): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU()
  (7): Linear(in_features=32, out_features=16, bias=True)
  (8): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (9): ReLU()
  (10): Linear(in_features=16, out_features=8, bias=True)
  (11): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (12): ReLU()
  (13): Linear(in_features=8, out_features=1353, bias=True)
), Best accuracy: 0.8745387453874539
Test Accuracy: 0.8782287822878229
In [ ]:
evaluate_ann(ann_model_non_ac_classroom, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.2, random_state=42))[1]
Out[ ]:
ANN
Metrics
Train Accuracy 88.35
F1 Macro 78.96
F1 Weighted 88.31
Recall Macro 78.78
Recall Weighted 88.35
Precision Macro 80.25
Precision Weighted 88.79
Test Accuracy 87.08
F1 Macro 80.34
F1 Weighted 87.0
Recall Macro 81.08
Recall Weighted 87.08
Precision Macro 81.0
Precision Weighted 87.61
In [ ]:
metric_to_show = 'Accuracy'

result_non_ac_classroom_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(result_non_ac_classroom_hyper_tuned.columns.to_numpy())), result_non_ac_classroom_hyper_tuned.columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
result_non_ac_classroom_hyper_tuned.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
result_non_ac_classroom.loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(result_non_ac_classroom.columns.to_numpy())), result_non_ac_classroom.columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image
In [ ]:
model_names, models = [], []
for model_name in evaluator_non_ac_classroom_hyper_tuned.model_names:
    model_names.append(model_name)
    models.append(evaluator_non_ac_classroom_hyper_tuned.models[model_name].named_steps['classifier'])
plot_feature_importances(models, model_names, df_tmp.columns.to_numpy()[:-1])
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

Objects ->

  • evaluator_non_ac_classroom
  • evaluator_non_ac_classroom_hyper_tuned

Results ->

  • result_non_ac_classroom
  • result_non_ac_classroom_hyper_tuned

Saving ML models¶

In [ ]:
with open("ac_lab_original_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_ac_lab, file)

with open("ac_lab_hypertuned_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_ac_lab_hyper_tuned, file)

with open("ac_classroom_original_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_ac_classroom, file)

with open("ac_classroom_hypertuned_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_ac_classroom_hyper_tuned, file)

with open("non_ac_classroom_original_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_non_ac_classroom, file)

with open("non_ac_classroom_hypertuned_models_object_40_test.pkl", "wb") as file:
    joblib.dump(evaluator_non_ac_classroom_hyper_tuned, file)
In [ ]:
with open("ac_lab_ann_model_object_40_test.pkl", "wb") as file:
    torch.save(ann_model_ac_lab, file)

with open("ac_classroom_ann_model_object_40_test.pkl", "wb") as file:
    torch.save(ann_model_ac_classroom, file)

with open("non_ac_classroom_ann_model_object_40_test.pkl", "wb") as file:
    torch.save(ann_model_non_ac_classroom, file)
In [ ]:
# checking

with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp = joblib.load(file)

model_name = 'MLP Neural Net'
scores = tmp.get_metric_scores(model_name)
print(f'Metric Scores for Model {model_name}:')
for metric, score in scores.items():
    print(f'{metric}: {score}')
Metric Scores for Model MLP Neural Net:
Train Accuracy: 0.990558615263572
Train F1 Macro: 0.9763425165179429
Train F1 Weighted: 0.9904363609691609
Train Recall Macro: 0.9641764863338181
Train Recall Weighted: 0.990558615263572
Train Precision Macro: 0.990815235430536
Train Precision Weighted: 0.9906684296202051
Train Confusion Matrix: [[1203    7    0    0]
 [   5  735    1    0]
 [   0    0  506    0]
 [   0    0   11   74]]
Test Accuracy: 0.9746462264150944
Test F1 Macro: 0.948302223342338
Test F1 Weighted: 0.9744902650792115
Test Recall Macro: 0.9373216576195762
Test Recall Weighted: 0.9746462264150944
Test Precision Macro: 0.9612730972945318
Test Precision Weighted: 0.9746150649055128
Test Confusion Matrix: [[799  15   0   0]
 [  9 451   3   0]
 [  0   4 364   3]
 [  0   0   9  39]]
In [ ]:
# checking
ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]
Out[ ]:
ANN
Metrics
Train Accuracy 88.12
F1 Macro 82.65
F1 Weighted 87.59
Recall Macro 80.43
Recall Weighted 88.12
Precision Macro 87.01
Precision Weighted 88.5
Test Accuracy 87.38
F1 Macro 81.12
F1 Weighted 86.96
Recall Macro 78.81
Recall Weighted 87.38
Precision Macro 85.23
Precision Weighted 87.7

Merging ANN with other models¶

AC LAB¶

In [ ]:
with open("/content/ac_lab_original_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp1 = evaluate_result(tmp1)

ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]


tmp1.join(tmp2)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 75.18 97.29 83.32 73.25 100.0 100.0 99.92 99.84 100.0 86.31 42.25 56.45 33.4 88.12
F1 Macro 60.59 96.02 75.55 53.63 100.0 100.0 99.79 99.73 100.0 79.19 22.97 41.23 7.15 82.65
F1 Weighted 72.13 97.28 82.42 69.2 100.0 100.0 99.92 99.84 100.0 85.64 31.42 50.4 16.72 87.59
Recall Macro 60.89 95.45 72.76 56.39 100.0 100.0 99.79 99.88 100.0 77.41 33.63 50.38 14.29 80.43
Recall Weighted 75.18 97.29 83.32 73.25 100.0 100.0 99.92 99.84 100.0 86.31 42.25 56.45 33.4 88.12
Precision Macro 67.28 96.65 86.75 52.53 100.0 100.0 99.79 99.59 100.0 85.67 18.26 58.89 4.77 87.01
Precision Weighted 71.66 97.29 84.91 66.94 100.0 100.0 99.92 99.84 100.0 87.01 26.59 64.75 11.15 88.5
Test Accuracy 76.65 93.99 83.67 73.94 97.29 98.41 98.17 97.7 97.88 86.5 43.51 57.49 34.91 87.38
F1 Macro 61.96 90.37 74.4 53.66 95.39 97.78 97.11 96.68 96.58 78.88 23.29 41.67 7.39 81.12
F1 Weighted 73.78 93.96 82.78 70.18 97.29 98.41 98.17 97.7 97.87 85.87 33.21 52.43 18.06 86.96
Recall Macro 61.53 89.84 71.37 55.85 95.3 97.68 96.87 96.45 96.0 77.28 33.61 51.34 14.29 78.81
Recall Weighted 76.65 93.99 83.67 73.94 97.29 98.41 98.17 97.7 97.88 86.5 43.51 57.49 34.91 87.38
Precision Macro 68.13 90.96 86.34 53.12 95.54 97.91 97.38 96.94 97.26 84.01 18.84 58.82 4.99 85.23
Precision Weighted 72.93 93.96 85.18 68.18 97.32 98.42 98.19 97.72 97.89 86.93 28.76 66.36 12.18 87.7
In [ ]:
metric_to_show = 'Accuracy'

tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp3 = joblib.load(file)

tmp3 = evaluate_result(tmp3)

ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]


tmp3.join(tmp4)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 77.62 100.0 95.83 86.15 100.0 99.57 99.92 99.84 100.0 97.25 69.79 65.38 70.02 88.12
F1 Macro 70.58 100.0 94.37 80.13 100.0 99.4 99.79 99.73 100.0 95.69 53.34 60.57 59.54 82.65
F1 Weighted 75.69 100.0 95.82 85.09 100.0 99.57 99.92 99.84 100.0 97.23 66.48 65.16 68.01 87.59
Recall Macro 70.03 100.0 93.44 77.99 100.0 99.34 99.79 99.88 100.0 94.71 55.56 64.78 63.49 80.43
Recall Weighted 77.62 100.0 95.83 86.15 100.0 99.57 99.92 99.84 100.0 97.25 69.79 65.38 70.02 88.12
Precision Macro 75.52 100.0 95.51 88.78 100.0 99.47 99.79 99.59 100.0 96.87 58.95 65.15 59.2 87.01
Precision Weighted 76.01 100.0 95.89 86.75 100.0 99.57 99.92 99.84 100.0 97.29 67.37 68.9 67.77 88.5
Test Accuracy 78.24 96.52 93.93 86.32 96.82 97.88 98.23 97.88 97.82 94.93 71.17 64.45 69.99 87.38
F1 Macro 69.87 95.01 91.52 80.41 94.21 97.41 97.22 97.05 96.53 91.72 54.42 58.22 58.62 81.12
F1 Weighted 76.42 96.52 93.96 85.46 96.79 97.88 98.23 97.88 97.81 94.88 68.36 64.65 68.33 86.96
Recall Macro 68.92 95.14 91.13 78.23 93.37 97.45 96.96 96.88 95.93 90.65 55.72 63.22 63.06 78.81
Recall Weighted 78.24 96.52 93.93 86.32 96.82 97.88 98.23 97.88 97.82 94.93 71.17 64.45 69.99 87.38
Precision Macro 75.92 94.89 92.09 88.26 95.27 97.41 97.51 97.24 97.24 92.97 63.21 62.25 59.02 85.23
Precision Weighted 77.03 96.53 94.05 87.04 96.81 97.91 98.24 97.89 97.84 94.9 70.74 68.73 68.68 87.7
In [ ]:
metric_to_show = 'Accuracy'

tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image

AC LAB FOR RESEARCH PAPER¶

In [ ]:
with open("/content/ac_lab_original_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp1 = evaluate_result(tmp1)

ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]

qwe = tmp1.join(tmp2).loc['Test'].loc[['Accuracy', 'F1 Weighted', 'Recall Weighted', 'Precision Weighted']][
    ['KNN', 'SVM', 'DecisionTree', 'RandomForest', 'XGB', 'GradientBoosting', 'MLP Neural Net', 'ANN']].T
qwe['F1 Weighted'] = qwe['F1 Weighted'].map(lambda x: round(x / 100, 2))
qwe['Recall Weighted'] = qwe['Recall Weighted'].map(lambda x: round(x / 100, 2))
qwe['Precision Weighted'] = qwe['Precision Weighted'].map(lambda x: round(x / 100, 2))
qwe
Out[ ]:
Metrics Accuracy F1 Weighted Recall Weighted Precision Weighted
KNN 96.7 0.97 0.97 0.97
SVM 91.16 0.91 0.91 0.91
DecisionTree 98.11 0.98 0.98 0.98
RandomForest 98.82 0.99 0.99 0.99
XGB 99.23 0.99 0.99 0.99
GradientBoosting 98.58 0.99 0.99 0.99
MLP Neural Net 91.69 0.91 0.92 0.92
ANN 93.4 0.93 0.93 0.94
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp3 = joblib.load(file)

tmp3 = evaluate_result(tmp3)

ob = torch.load("/content/ac_lab_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[1]

qwe_h = tmp3.join(tmp4).loc['Test'].loc[['Accuracy', 'F1 Weighted', 'Recall Weighted', 'Precision Weighted']][['KNN', 'SVM', 'DecisionTree', 'RandomForest', 'XGB', 'GradientBoosting', 'MLP Neural Net', 'ANN']].T
qwe_h['F1 Weighted'] = qwe_h['F1 Weighted'].map(lambda x: round(x / 100, 2))
qwe_h['Recall Weighted'] = qwe_h['Recall Weighted'].map(lambda x: round(x / 100, 2))
qwe_h['Precision Weighted'] = qwe_h['Precision Weighted'].map(lambda x: round(x / 100, 2))
qwe_h
Out[ ]:
Metrics Accuracy F1 Weighted Recall Weighted Precision Weighted
KNN 97.76 0.98 0.98 0.98
SVM 96.99 0.97 0.97 0.97
DecisionTree 98.82 0.99 0.99 0.99
RandomForest 98.41 0.98 0.98 0.98
XGB 99.17 0.99 0.99 0.99
GradientBoosting 98.76 0.99 0.99 0.99
MLP Neural Net 97.46 0.97 0.97 0.97
ANN 93.4 0.93 0.93 0.94
In [ ]:
# Sample data
models = qwe.rename(index = {'RandomForest': 'RF', 'MLP Neural Net': 'MLP', 'GradientBoosting': 'GBM', 'DecisionTree': 'DT'}).index.tolist()
accuracy_nontuned = qwe['Accuracy']
accuracy_tuned = qwe_h['Accuracy']

# Set the figure size
fig, ax = plt.subplots(figsize=(10, 4))

bar_width = 0.35
index = np.arange(len(models))

# Plotting the bars
bars1 = ax.bar(index, accuracy_nontuned, width=bar_width, label='Non-Tuned')
bars2 = ax.bar(index + bar_width, accuracy_tuned, width=bar_width, label='Tuned')

# Annotate the bars with accuracy values
for bar, acc in zip(bars1, accuracy_nontuned):
    height = bar.get_height()
    ax.annotate(f'{int(acc)}%', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha='center', va='bottom', fontsize=10.5)

for bar, acc in zip(bars2, accuracy_tuned):
    height = bar.get_height()
    ax.annotate(f'{int(acc)}%', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3),  # 3 points vertical offset
                textcoords="offset points",
                ha='center', va='bottom', fontsize=10.5)


# Set labels and title
ax.set_xlabel('Models', fontsize=15)
ax.set_ylabel('Accuracy (%)', fontsize=15)

# Rotate x-axis labels for better readability
ax.set_xticks(index + bar_width / 2)
ax.set_xticklabels(models, rotation=45, fontsize=15)
ax.tick_params(axis='y', which='both', labelsize=15)
# Add legend
ax.legend(loc='lower right')
plt.subplots_adjust(top=1.3)

# Show the plot
plt.show()
No description has been provided for this image
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp3 = joblib.load(file)

est_mlp = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))

feature_importances_dt = tmp3.models['DecisionTree'].named_steps['classifier'].feature_importances_
feature_importances_rf = tmp3.models['RandomForest'].named_steps['classifier'].feature_importances_
feature_importances_xgb = tmp3.models['XGB'].named_steps['classifier'].feature_importances_
feature_importances_gb = tmp3.models['GradientBoosting'].named_steps['classifier'].feature_importances_

all_feature_importances = np.vstack((feature_importances_dt, feature_importances_rf,
                                     feature_importances_xgb, feature_importances_gb))

mlp_feature_importances = est_mlp([np.sum(np.abs(layer), axis=1) for layer in [layer / np.linalg.norm(layer, ord=2, axis=0)
                            for layer in tmp3.models['MLP Neural Net'].named_steps['classifier'].coefs_]][0])

all_feature_importances = np.vstack((all_feature_importances, mlp_feature_importances))

fig, ax = plt.subplots(figsize=(8, 4))

# Plotting the bars
bars = ax.bar(df_tmp.columns[:-1].to_list(), np.median(all_feature_importances, axis=0))

ax.set_xlabel('Features', fontsize=15)
ax.set_ylabel('Median Weightage', fontsize=15)

plt.legend().set_visible(False)
ax.set_xticklabels(df_tmp.columns[:-1].to_list(), rotation=45, fontsize=12)
ax.tick_params(axis='y', which='both', labelsize=15)

# Annotate the bars with their values
for bar in bars:
    height = bar.get_height()
    ax.annotate(f'{height:.2f}', xy=(bar.get_x() + bar.get_width() / 2, height),
                xytext=(0, 3), textcoords="offset points",
                ha='center', va='bottom', fontsize=10)

# Show the plot
plt.subplots_adjust(top=1.2)
plt.show()
WARNING:matplotlib.legend:No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No description has been provided for this image
In [ ]:
def calculate_class_accuracy(obj = None, confusion_matrix_ann = None):
    each_model_each_class_accuracy, res = {}, None
    if obj is not None:
        for model in obj.model_names:
            confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
            num_classes, class_accuracies = len(confusion_matrix), {}
            for i in range(num_classes):
                TP = confusion_matrix[i, i]
                FP = sum(confusion_matrix[:, i]) - TP
                FN = sum(confusion_matrix[i, :]) - TP
                TN = np.sum(confusion_matrix) - TP - FP - FN
                total_samples = TP + TN + FP + FN
                accuracy = (TP + TN) / total_samples
                class_accuracies[f'Class {i + 1}'] = {'Accuracy (%)': round(accuracy * 100.00, 2),
                                                      'Total Samples': total_samples,
                                                      'Total Correct Samples Predicted': TP + TN}
            each_model_each_class_accuracy[model] = class_accuracies
    if confusion_matrix_ann is not None:
        num_classes, class_accuracies = len(confusion_matrix_ann), {}
        for i in range(num_classes):
            TP = confusion_matrix_ann[i, i]
            FP = sum(confusion_matrix_ann[:, i]) - TP
            FN = sum(confusion_matrix_ann[i, :]) - TP
            TN = np.sum(confusion_matrix_ann) - TP - FP - FN
            total_samples = TP + TN + FP + FN
            accuracy = (TP + TN) / total_samples
            class_accuracies[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2),
                                              'Total Samples': total_samples,
                                              'Total Correct Samples Predicted': TP + TN}
        each_model_each_class_accuracy['ANN'] = class_accuracies
    for model_name in each_model_each_class_accuracy:
        tmp = pd.DataFrame(each_model_each_class_accuracy[model_name]).T.stack(0).reset_index().rename(
            columns = {'level_0': 'Class', 'level_1': 'Attributes', 0: model_name}).set_index(
                ['Class', 'Attributes'], drop = True)
        if res is None:
            res = tmp
        else:
            res = res.join(tmp)
    return res

with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
                    *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']

class_accuracy_df = calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")

data_to_plot = class_accuracy_df[['XGB', 'DecisionTree', 'RandomForest', 'GradientBoosting', 'MLP Neural Net']].rename(columns = {'DecisionTree': 'DT', 'RandomForest': 'RF', 'GradientBoosting': 'GBM', 'MLP Neural Net': 'MLP'})

ax = data_to_plot.plot(marker='o', figsize=(10, 6))

for model in data_to_plot.columns:
    for index, value in enumerate(data_to_plot[model]):
        ax.annotate(f'{value:.2f}%', (index, value), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=11)

new_labels = data_to_plot.index.get_level_values(0).to_list()
ax.set_xticks(range(len(new_labels)))
ax.set_xticklabels(new_labels, fontsize=18)
ax.set_xlabel('Occupancy Levels', fontsize=18)
ax.set_ylabel('Accuracy (%)', fontsize=18)
ax.tick_params(axis='y', which='both', labelsize=18);
ax.legend(fontsize='large')
plt.show()
No description has been provided for this image
In [ ]:
def calculate_class_accuracy(obj = None, confusion_matrix_ann = None):
    each_model_each_class_accuracy = {}
    res = None
    if obj is not None:
        confusion_matrix = None
        c = 0
        for model in ['XGB', 'DecisionTree', 'RandomForest', 'GradientBoosting', 'MLP Neural Net']:
            if confusion_matrix is not None:
                confusion_matrix = np.add(confusion_matrix, obj.get_metric_scores(model)['Test Confusion Matrix'])
            else:
                confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
            c += 1
    if confusion_matrix_ann is not None:
        confusion_matrix = np.add(confusion_matrix, confusion_matrix_ann)
        c += 1
    return (confusion_matrix // c)

with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
                    *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']

all_value_count = Counter(train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42)[3])
total = sum(all_value_count.values())

cm = calculate_class_accuracy(tmp1, tmp2)
fig, ax = plt.subplots()
ax.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)

ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=[f"Class {i}" for i in range(1, cm.shape[1] + 1)], yticklabels=[f"Class {i}\n ({((all_value_count[i - 1] / total) * 100.00):.2f} %)" for i in range(1, cm.shape[0] + 1)],
           ylabel='True label',
           xlabel='Predicted label')

fmt = 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        ax.text(j, i, format(cm[i, j], fmt),
                ha="center", va="center",
                color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
plt.xlim(-0.5, len(np.unique(ac_lab_y))-0.5)
plt.ylim(len(np.unique(ac_lab_y))-0.5, -0.5)
np.set_printoptions(precision=2)

plt.show();
No description has been provided for this image

AC CLASSROOM¶

In [ ]:
with open("/content/ac_classroom_original_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp1 = evaluate_result(tmp1)

ob = torch.load("/content/ac_classroom_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]


tmp1.join(tmp2)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 72.55 96.28 81.6 67.67 99.8 99.8 99.6 99.5 99.5 82.45 51.63 33.69 2.87 81.8
F1 Macro 64.14 95.59 72.96 48.36 99.73 99.73 99.6 99.43 99.27 72.35 39.43 31.88 0.93 72.38
F1 Weighted 70.28 96.27 79.18 64.7 99.8 99.8 99.6 99.5 99.49 79.88 40.88 22.22 0.16 80.58
Recall Macro 63.54 95.43 73.61 50.68 99.77 99.56 99.45 99.28 98.95 72.76 41.17 49.44 16.67 74.18
Recall Weighted 72.55 96.28 81.6 67.67 99.8 99.8 99.6 99.5 99.5 82.45 51.63 33.69 2.87 81.8
Precision Macro 66.48 95.76 88.99 62.64 99.69 99.9 99.75 99.58 99.61 72.5 53.73 45.86 0.48 78.85
Precision Weighted 68.57 96.27 83.33 65.35 99.8 99.8 99.6 99.5 99.5 77.74 46.8 61.21 0.08 82.45
Test Accuracy 73.1 93.75 83.35 69.86 97.66 97.59 97.51 97.59 97.81 84.4 50.26 34.06 2.19 83.27
F1 Macro 63.72 91.0 73.73 50.12 95.8 95.58 96.18 96.32 96.37 73.03 38.82 31.03 0.71 71.13
F1 Weighted 71.31 93.78 81.47 67.45 97.71 97.58 97.52 97.61 97.82 82.33 39.26 22.91 0.09 82.46
Recall Macro 62.02 90.87 73.63 50.75 96.99 95.05 96.52 96.55 96.23 72.65 40.38 48.55 16.67 73.79
Recall Weighted 73.1 93.75 83.35 69.86 97.66 97.59 97.51 97.59 97.81 84.4 50.26 34.06 2.19 83.27
Precision Macro 67.42 91.22 90.05 64.89 94.84 96.13 95.88 96.13 96.52 73.93 54.71 46.04 0.36 79.1
Precision Weighted 70.03 93.86 84.53 67.62 97.83 97.58 97.55 97.65 97.85 80.49 46.79 64.49 0.05 84.73
In [ ]:
metric_to_show = 'Accuracy'

tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
with open("/content/ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp3 = joblib.load(file)

tmp3 = evaluate_result(tmp3)

ob = torch.load("/content/ac_classroom_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[1]


tmp3.join(tmp4)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 75.87 97.23 94.87 80.19 99.04 99.35 99.6 99.3 99.8 98.54 52.99 54.4 72.75 81.8
F1 Macro 72.98 96.43 92.7 78.53 98.47 99.09 99.6 99.1 99.73 98.15 39.83 52.32 71.13 72.38
F1 Weighted 74.38 97.22 94.69 79.81 99.05 99.34 99.6 99.29 99.8 98.54 48.06 49.99 73.09 80.58
Recall Macro 72.29 96.27 91.5 77.61 98.37 98.79 99.45 98.95 99.66 98.11 42.47 57.79 72.7 74.18
Recall Weighted 75.87 97.23 94.87 80.19 99.04 99.35 99.6 99.3 99.8 98.54 52.99 54.4 72.75 81.8
Precision Macro 84.69 96.61 94.64 80.14 98.57 99.4 99.75 99.25 99.8 98.19 46.55 56.94 72.67 78.85
Precision Weighted 77.22 97.22 94.82 79.73 99.05 99.34 99.6 99.29 99.8 98.55 54.57 58.54 77.45 82.45
Test Accuracy 75.96 94.72 94.8 79.5 97.06 97.59 97.59 97.51 97.51 96.01 56.52 57.27 76.79 83.27
F1 Macro 73.3 91.94 92.45 75.84 95.02 96.06 96.26 96.07 95.86 94.25 42.46 53.0 74.12 71.13
F1 Weighted 74.89 94.75 94.73 79.38 97.11 97.58 97.61 97.52 97.54 96.04 52.01 54.01 77.68 82.46
Recall Macro 71.63 92.23 90.96 74.18 95.17 95.26 96.77 95.75 96.07 94.66 44.66 58.64 75.93 73.79
Recall Weighted 75.96 94.72 94.8 79.5 97.06 97.59 97.59 97.51 97.51 96.01 56.52 57.27 76.79 83.27
Precision Macro 84.57 91.74 94.26 77.79 95.06 96.91 95.8 96.41 95.7 93.92 50.62 57.86 75.49 79.1
Precision Weighted 76.83 94.82 94.77 79.44 97.22 97.59 97.64 97.54 97.59 96.11 59.99 64.08 82.63 84.73
In [ ]:
metric_to_show = 'Accuracy'

tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image

NON AC CLASSROOM¶

In [ ]:
with open("/content/non_ac_classroom_original_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp1 = evaluate_result(tmp1)

ob = torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl")
tmp2 = evaluate_ann(ob, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[1]


tmp1.join(tmp2)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 77.07 97.53 82.12 71.89 100.0 100.0 100.0 100.0 100.0 77.07 69.54 81.13 72.38 88.41
F1 Macro 55.99 96.07 68.99 41.59 100.0 100.0 100.0 100.0 100.0 53.1 63.58 75.12 56.02 79.12
F1 Weighted 72.66 97.54 80.4 64.24 100.0 100.0 100.0 100.0 100.0 71.31 70.84 82.33 67.14 88.35
Recall Macro 58.56 96.28 71.11 47.77 100.0 100.0 100.0 100.0 100.0 58.03 63.04 78.52 58.6 79.05
Recall Weighted 77.07 97.53 82.12 71.89 100.0 100.0 100.0 100.0 100.0 77.07 69.54 81.13 72.38 88.41
Precision Macro 60.52 95.87 72.52 54.42 100.0 100.0 100.0 100.0 100.0 49.25 71.41 78.15 65.99 80.14
Precision Weighted 72.39 97.55 81.89 68.01 100.0 100.0 100.0 100.0 100.0 66.75 78.37 88.87 74.67 88.74
Test Accuracy 76.2 96.31 79.34 71.59 96.13 97.23 97.6 97.6 96.31 75.83 68.27 82.29 71.03 87.64
F1 Macro 55.9 94.21 66.44 42.59 93.8 95.39 96.32 96.12 94.49 53.3 61.31 77.98 57.81 79.59
F1 Weighted 70.65 96.23 77.14 62.6 96.12 97.22 97.61 97.6 96.31 69.19 68.99 83.54 66.13 87.61
Recall Macro 59.84 93.87 68.1 48.84 93.85 95.13 96.08 95.78 94.53 59.38 59.77 80.44 60.95 79.67
Recall Weighted 76.2 96.31 79.34 71.59 96.13 97.23 97.6 97.6 96.31 75.83 68.27 82.29 71.03 87.64
Precision Macro 61.06 94.76 69.4 54.63 93.78 95.66 96.6 96.48 94.48 48.88 68.43 80.5 65.16 80.92
Precision Weighted 71.09 96.29 77.85 64.59 96.12 97.21 97.63 97.61 96.32 64.09 73.85 89.17 73.0 88.27
In [ ]:
metric_to_show = 'Accuracy'

tmp1.join(tmp2).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
with open("/content/non_ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp3 = joblib.load(file)

tmp3 = evaluate_result(tmp3)

ob = torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl")
tmp4 = evaluate_ann(ob, *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[1]


tmp3.join(tmp4)
Out[ ]:
LogisticRegression KNN SVM Linear SVM DecisionTree RandomForest XGB LGBM GradientBoosting MLP Neural Net AdaBoost Naive Bayes QDA ANN
Metrics
Train Accuracy 83.85 100.0 97.78 84.96 99.51 100.0 100.0 100.0 100.0 99.51 67.45 83.48 75.59 88.41
F1 Macro 71.88 100.0 95.99 71.27 99.13 100.0 100.0 100.0 100.0 98.89 55.33 77.79 59.82 79.12
F1 Weighted 82.38 100.0 97.78 82.6 99.51 100.0 100.0 100.0 100.0 99.51 66.42 84.76 73.27 88.35
Recall Macro 70.01 100.0 95.95 71.08 99.01 100.0 100.0 100.0 100.0 98.95 52.83 81.19 65.3 79.05
Recall Weighted 83.85 100.0 97.78 84.96 99.51 100.0 100.0 100.0 100.0 99.51 67.45 83.48 75.59 88.41
Precision Macro 78.5 100.0 96.05 87.0 99.28 100.0 100.0 100.0 100.0 98.87 63.59 79.05 61.57 80.14
Precision Weighted 83.23 100.0 97.79 86.97 99.52 100.0 100.0 100.0 100.0 99.52 71.07 89.74 77.17 88.74
Test Accuracy 80.81 95.2 95.57 83.39 96.13 97.42 97.23 97.6 96.49 96.86 64.76 84.5 73.8 87.64
F1 Macro 68.97 92.3 92.94 69.71 93.97 95.75 95.57 95.98 94.72 94.64 54.23 79.72 59.46 79.59
F1 Weighted 79.11 95.16 95.51 80.63 96.11 97.41 97.23 97.58 96.49 96.86 63.45 85.75 71.23 87.61
Recall Macro 69.02 92.08 92.32 70.58 93.72 95.45 95.45 95.69 94.62 94.36 52.03 82.21 64.66 79.67
Recall Weighted 80.81 95.2 95.57 83.39 96.13 97.42 97.23 97.6 96.49 96.86 64.76 84.5 73.8 87.64
Precision Macro 74.75 92.54 93.69 84.74 94.48 96.07 95.71 96.31 94.82 95.05 62.36 80.62 60.39 80.92
Precision Weighted 80.73 95.13 95.53 86.05 96.23 97.41 97.23 97.58 96.48 96.91 68.05 89.78 74.4 88.27
In [ ]:
metric_to_show = 'Accuracy'

tmp3.join(tmp4).loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(marker='o', figsize=(14, 8))
plt.title(f'Train and Test {metric_to_show} for Different Hyper Tuned Models')
plt.xlabel('Model')
plt.ylabel(metric_to_show)
plt.xticks(np.arange(len(tmp3.join(tmp4).columns.to_numpy())), tmp3.join(tmp4).columns.to_numpy(), rotation=90)
plt.legend(loc='best')
plt.grid(True)
plt.show()
No description has been provided for this image
In [ ]:
metric_to_show = 'Accuracy'       # Accuracy, F1 Macro, F1 Weighted, Recall Macro, Recall Weighted, Precision Macro, Precision Weighted

fig, ax = plt.subplots(figsize=(10, 6))

# Plot the first set of data
# result_hyper_tuned.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Train and Test {metric_to_show} for Different Models')
tmp3.join(tmp4).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8), title=f'Test {metric_to_show} for Different Models')

# Plot the second set of data
# result.loc[[('Train', metric_to_show), ('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))
tmp1.join(tmp2).loc[[('Test', metric_to_show)]].T.plot(ax=ax, marker='o', figsize=(14, 8))

# Set labels, legends, and show the plot
ax.set_xlabel('Model')
ax.set_ylabel(metric_to_show)
ax.grid(True)
# ax.legend(['Hyper Tuned Train', 'Hyper Tuned Test', 'Original Train', 'Original Test'])
ax.legend(['Hyper Tuned Test', 'Original Test'])
plt.xticks(np.arange(len(tmp1.join(tmp2).columns.to_numpy())), tmp1.join(tmp2).columns.to_numpy(), rotation=90)
plt.show();
No description has been provided for this image

Each Class Accuracy, Precision, Recall, F1 Score of Prediction¶

In [ ]:
def calculate_class_metrics(obj = None, confusion_matrix_ann = None):
    each_model_each_class_metrics = {}
    res = None
    epsilon = 1e-7  # small constant

    if obj is not None:
        for model in obj.model_names:
            confusion_matrix = obj.get_metric_scores(model)['Test Confusion Matrix']
            num_classes = len(confusion_matrix)
            class_metrics = {}

            for i in range(num_classes):
                TP = confusion_matrix[i, i]
                FP = sum(confusion_matrix[:, i]) - TP
                FN = sum(confusion_matrix[i, :]) - TP
                total_samples = TP + FP + FN

                accuracy = TP / (total_samples + epsilon)
                precision = TP / (TP + FP + epsilon)
                recall = TP / (TP + FN + epsilon)
                f1_score = 2 * (precision * recall) / (precision + recall + epsilon)

                class_metrics[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2), 'Precision (%)': round(precision * 100.00, 2), 'Recall (%)': round(recall * 100.00, 2), 'F1 Score (%)': round(f1_score * 100.00, 2)}
            each_model_each_class_metrics[model] = class_metrics

    if confusion_matrix_ann is not None:
        num_classes = len(confusion_matrix_ann)
        class_metrics = {}
        for i in range(num_classes):
            TP = confusion_matrix_ann[i, i]
            FP = sum(confusion_matrix_ann[:, i]) - TP
            FN = sum(confusion_matrix_ann[i, :]) - TP
            total_samples = TP + FP + FN

            accuracy = TP / (total_samples + epsilon)
            precision = TP / (TP + FP + epsilon)
            recall = TP / (TP + FN + epsilon)
            f1_score = 2 * (precision * recall) / (precision + recall + epsilon)

            class_metrics[f'Class {i}'] = {'Accuracy (%)': round(accuracy * 100.00, 2), 'Precision (%)': round(precision * 100.00, 2), 'Recall (%)': round(recall * 100.00, 2), 'F1 Score (%)': round(f1_score * 100.00, 2)}
        each_model_each_class_metrics['ANN'] = class_metrics

    for model_name in each_model_each_class_metrics:
        tmp = pd.DataFrame(each_model_each_class_metrics[model_name]).T.stack(0).reset_index().rename(columns = {'level_0': 'Class', 'level_1': 'Attributes', 0: model_name}).set_index(['Class', 'Attributes'], drop = True)
        if res is None:
            res = tmp
        else:
            res = res.join(tmp, how='outer')

    return res
In [ ]:
with open("/content/ac_lab_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp2 = evaluate_ann(torch.load("/content/ac_lab_ann_model_object_40_test.pkl"),
                    *train_test_split(ac_lab_x, ac_lab_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']

calculate_class_metrics(tmp1, tmp2)
In [ ]:
with open("/content/ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp2 = evaluate_ann(torch.load("/content/ac_classroom_ann_model_object_40_test.pkl"),
                    *train_test_split(ac_classroom_x, ac_classroom_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']

calculate_class_metrics(tmp1, tmp2)
# calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")
In [ ]:
with open("/content/non_ac_classroom_hypertuned_models_object_40_test.pkl", "rb") as file:
    tmp1 = joblib.load(file)

tmp2 = evaluate_ann(torch.load("/content/non_ac_classroom_ann_model_object_40_test.pkl"),
                    *train_test_split(non_ac_classroom_x, non_ac_classroom_y, test_size=0.4, random_state=42))[0]['Test Confusion Matrix']

calculate_class_metrics(tmp1, tmp2)
# calculate_class_accuracy(tmp1, tmp2).query("Attributes == 'Accuracy (%)'")